Passed
Push — master ( 6eaa7f...c08b92 )
by Lars
04:10
created

UTF8   F

Complexity

Total Complexity 1781

Size/Duplication

Total Lines 14800
Duplicated Lines 0 %

Test Coverage

Coverage 81.54%

Importance

Changes 110
Bugs 53 Features 6
Metric Value
eloc 4526
c 110
b 53
f 6
dl 0
loc 14800
ccs 3199
cts 3923
cp 0.8154
rs 0.8
wmc 1781

310 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A __construct() 0 2 1
A char_at() 0 7 2
A chars() 0 4 1
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A str_begins() 0 3 1
A rtrim() 0 27 5
A spaces_to_tabs() 0 11 3
A showSupport() 0 17 3
A single_chr_html_encode() 0 18 4
A str_capitalize_name() 0 8 1
A split() 0 7 1
B chr_to_decimal() 0 38 8
A ctype_loaded() 0 3 1
D chr() 0 109 19
A chr_to_int() 0 3 1
A decode_mimeheader() 0 8 3
A chunk_split() 0 3 1
A css_identifier() 0 56 6
A css_stripe_media_queries() 0 6 1
A clean() 0 48 6
A decimal_to_chr() 0 3 1
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A checkForSupport() 0 48 4
A chr_to_hex() 0 11 3
A collapse_whitespace() 0 8 2
A emoji_from_country_code() 0 17 3
A str_substr_after_first_separator() 0 28 6
B str_obfuscate() 0 47 8
A str_upper_camelize() 0 8 1
A str_transliterate() 0 6 1
D str_truncate_safe() 0 86 18
A str_underscored() 0 3 1
A str_surround() 0 3 1
A str_starts_with_any() 0 17 5
A str_word_count() 0 23 5
A str_to_binary() 0 10 2
A chr_size_list() 0 17 3
A rawurldecode() 0 35 4
A count_chars() 0 11 1
B between() 0 48 8
A file_has_bom() 0 8 2
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A filter_input() 0 16 3
A str_contains() 0 15 3
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A get_unique_string() 0 22 3
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 26 5
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 25 6
A has_uppercase() 0 8 2
A remove_left() 0 28 4
C stripos() 0 67 14
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
C utf8_decode() 0 61 13
A ltrim() 0 27 5
A emoji_decode() 0 21 3
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 73 15
B ucfirst() 0 57 7
A lcword() 0 13 1
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A html_escape() 0 6 1
A toUTF8() 0 3 1
A string() 0 16 4
D normalize_encoding() 0 147 16
B rxClass() 0 44 8
B get_file_type() 0 65 7
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 171 7
C is_utf16() 0 71 16
A isHtml() 0 3 1
C filter() 0 59 14
A normalize_whitespace() 0 11 1
A str_starts_with() 0 16 4
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 14 2
C substr_count_in_byte() 0 55 15
A html_decode() 0 6 1
A strchr() 0 13 1
A strichr() 0 13 1
A isUtf32() 0 3 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 60 10
A urldecode() 0 35 4
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 125 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A emoji_encode() 0 21 3
A str_matches_pattern() 0 3 1
A is_alpha() 0 8 2
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
B get_random_string() 0 56 10
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A first_char() 0 14 4
A to_boolean() 0 35 5
C stristr() 0 79 17
A isUtf8() 0 3 1
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 27 5
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A substr_compare() 0 33 6
D substr_count() 0 73 17
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 12 3
A str_ireplace() 0 31 5
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 42 11
B str_contains_all() 0 24 9
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 72 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A str_istarts_with() 0 11 3
A is_blank() 0 8 2
A str_replace() 0 18 1
A substr_iright() 0 15 4
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A filter_var_array() 0 15 2
A to_iso8859() 0 16 4
A has_whitespace() 0 8 2
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
A substr_right() 0 31 6
A lowerCaseFirst() 0 13 1
D str_split() 0 135 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A remove_right() 0 25 4
F strrpos() 0 136 31
A remove_html_breaks() 0 3 1
A remove_invisible_characters() 0 11 1
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 38 9
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
B is_url() 0 44 7
A finfo_loaded() 0 3 1
B str_truncate() 0 44 7
F strripos() 0 113 25
A strpos_in_byte() 0 12 4
A str_ends_with() 0 16 4
A fits_inside() 0 3 1
A to_ascii() 0 6 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 31 6
A str_isubstr_after_first_separator() 0 26 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
B str_snakeize() 0 57 6
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A str_sort() 0 15 3
A to_utf8() 0 15 3
A ucword() 0 6 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A hasBom() 0 3 1
A toAscii() 0 6 1
A str_ibegins() 0 3 1
B str_limit_after_word() 0 53 11
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A str_upper_first() 0 13 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A filter_var() 0 15 2
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 54 11
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 146 37
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C is_utf32() 0 71 16
C ord() 0 70 16
B to_string() 0 33 8
A is_alphanumeric() 0 8 2
A strtonatfold() 0 12 2
A json_decode() 0 14 2
A fix_simple_utf8() 0 32 4
C strcspn() 0 49 12
A is_printable() 0 3 1
B is_json() 0 27 8
A fixStrCaseHelper() 0 41 5
A int_to_hex() 0 7 2
C str_split_pattern() 0 56 13
D strstr() 0 107 21
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 17 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 22 6
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 4 1
A str_substr_before_first_separator() 0 32 6
F substr() 0 137 31
A isJson() 0 3 1
A wordwrap_per_line() 0 28 5
A strncmp() 0 19 4
A filter_input_array() 0 15 3
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 43 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 60 10
A min() 0 14 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 59 13
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 29 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 151 33
A str_shuffle() 0 35 6
A strcmp() 0 11 2
B file_get_contents() 0 56 11
A strripos_in_byte() 0 12 4
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @phpstan-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @phpstan-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @phpstan-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @phpstan-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @phpstan-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @phpstan-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @phpstan-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @phpstan-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @phpstan-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @phpstan-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @phpstan-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @phpstan-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401
            $substr_index,
402 5
            $end_position - $substr_index,
403
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 4
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 4
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
520
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
521
            if (self::$SUPPORT['mbstring'] === true) {
522
                \mb_internal_encoding('UTF-8');
523
                /** @noinspection UnusedFunctionResultInspection */
524
                /** @noinspection PhpComposerExtensionStubsInspection */
525
                \mb_regex_encoding('UTF-8');
526
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
527
            }
528
529
            // http://php.net/manual/en/book.iconv.php
530
            self::$SUPPORT['iconv'] = self::iconv_loaded();
531
532
            // http://php.net/manual/en/book.intl.php
533
            self::$SUPPORT['intl'] = self::intl_loaded();
534
535
            // http://php.net/manual/en/class.intlchar.php
536
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
537
538
            // http://php.net/manual/en/book.ctype.php
539
            self::$SUPPORT['ctype'] = self::ctype_loaded();
540
541
            // http://php.net/manual/en/class.finfo.php
542
            self::$SUPPORT['finfo'] = self::finfo_loaded();
543
544
            // http://php.net/manual/en/book.json.php
545
            self::$SUPPORT['json'] = self::json_loaded();
546
547
            // http://php.net/manual/en/book.pcre.php
548
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
549
550
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
551
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
552
                \mb_internal_encoding('UTF-8');
553
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
554
            }
555
556
            return true;
557
        }
558
559 5
        return null;
560
    }
561
562
    /**
563
     * Generates a UTF-8 encoded character from the given code point.
564
     *
565
     * INFO: opposite to UTF8::ord()
566
     *
567
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
568
     *
569
     * @param int    $code_point <p>The code point for which to generate a character.</p>
570
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
571
     *
572
     * @psalm-pure
573
     *
574
     * @return string|null
575
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
576
     */
577 21
    public static function chr($code_point, string $encoding = 'UTF-8')
578
    {
579
        // init
580
        /**
581
         * @psalm-suppress ImpureStaticVariable
582
         *
583
         * @var array<string,string>
584
         */
585 21
        static $CHAR_CACHE = [];
586
587 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
588 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
589
        }
590
591
        /** @noinspection InArrayCanBeUsedInspection */
592
        if (
593 21
            $encoding !== 'UTF-8'
594
            &&
595 21
            $encoding !== 'ISO-8859-1'
596
            &&
597 21
            $encoding !== 'WINDOWS-1252'
598
            &&
599 21
            self::$SUPPORT['mbstring'] === false
600
        ) {
601
            /**
602
             * @psalm-suppress ImpureFunctionCall - is is only a warning
603
             */
604
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
605
        }
606
607 21
        if (!\is_int($code_point) || $code_point <= 0) {
0 ignored issues
show
introduced by
The condition is_int($code_point) is always true.
Loading history...
608 5
            return null;
609
        }
610
611 21
        $cache_key = $code_point . '_' . $encoding;
612 21
        if (isset($CHAR_CACHE[$cache_key])) {
613 19
            return $CHAR_CACHE[$cache_key];
614
        }
615
616 10
        if ($code_point <= 0x80) { // only for "simple"-chars
617
618 9
            if (self::$CHR === null) {
619
                self::$CHR = self::getData('chr');
620
            }
621
622
            /**
623
             * @psalm-suppress PossiblyNullArrayAccess
624
             */
625 9
            $chr = self::$CHR[$code_point];
626
627 9
            if ($encoding !== 'UTF-8') {
628 1
                $chr = self::encode($encoding, $chr);
629
            }
630
631 9
            return $CHAR_CACHE[$cache_key] = $chr;
632
        }
633
634
        //
635
        // fallback via "IntlChar"
636
        //
637
638 6
        if (self::$SUPPORT['intlChar'] === true) {
639
            /** @noinspection PhpComposerExtensionStubsInspection */
640 6
            $chr = \IntlChar::chr($code_point);
641
642 6
            if ($encoding !== 'UTF-8') {
643
                $chr = self::encode($encoding, $chr);
644
            }
645
646 6
            return $CHAR_CACHE[$cache_key] = $chr;
647
        }
648
649
        //
650
        // fallback via vanilla php
651
        //
652
653
        if (self::$CHR === null) {
654
            self::$CHR = self::getData('chr');
655
        }
656
657
        $code_point = (int) $code_point;
658
        if ($code_point <= 0x7FF) {
659
            /**
660
             * @psalm-suppress PossiblyNullArrayAccess
661
             */
662
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
663
                   self::$CHR[($code_point & 0x3F) + 0x80];
664
        } elseif ($code_point <= 0xFFFF) {
665
            /**
666
             * @psalm-suppress PossiblyNullArrayAccess
667
             */
668
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
669
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
670
                   self::$CHR[($code_point & 0x3F) + 0x80];
671
        } else {
672
            /**
673
             * @psalm-suppress PossiblyNullArrayAccess
674
             */
675
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
676
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
677
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
678
                   self::$CHR[($code_point & 0x3F) + 0x80];
679
        }
680
681
        if ($encoding !== 'UTF-8') {
682
            $chr = self::encode($encoding, $chr);
683
        }
684
685
        return $CHAR_CACHE[$cache_key] = $chr;
686
    }
687
688
    /**
689
     * Applies callback to all characters of a string.
690
     *
691
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
692
     *
693
     * @param callable $callback <p>The callback function.</p>
694
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
695
     *
696
     * @psalm-pure
697
     *
698
     * @return string[]
699
     *                  <p>The outcome of the callback, as array.</p>
700
     */
701 2
    public static function chr_map($callback, string $str): array
702
    {
703 2
        return \array_map(
704 2
            $callback,
705 2
            self::str_split($str)
706
        );
707
    }
708
709
    /**
710
     * Generates an array of byte length of each character of a Unicode string.
711
     *
712
     * 1 byte => U+0000  - U+007F
713
     * 2 byte => U+0080  - U+07FF
714
     * 3 byte => U+0800  - U+FFFF
715
     * 4 byte => U+10000 - U+10FFFF
716
     *
717
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
718
     *
719
     * @param string $str <p>The original unicode string.</p>
720
     *
721
     * @psalm-pure
722
     *
723
     * @return int[]
724
     *               <p>An array of byte lengths of each character.</p>
725
     */
726 4
    public static function chr_size_list(string $str): array
727
    {
728 4
        if ($str === '') {
729 4
            return [];
730
        }
731
732 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
733
            return \array_map(
734
                static function (string $data): int {
735
                    // "mb_" is available if overload is used, so use it ...
736
                    return \mb_strlen($data, 'CP850'); // 8-BIT
737
                },
738
                self::str_split($str)
739
            );
740
        }
741
742 4
        return \array_map('\strlen', self::str_split($str));
743
    }
744
745
    /**
746
     * Get a decimal code representation of a specific character.
747
     *
748
     * INFO: opposite to UTF8::decimal_to_chr()
749
     *
750
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
751
     *
752
     * @param string $char <p>The input character.</p>
753
     *
754
     * @psalm-pure
755
     *
756
     * @return int
757
     */
758 5
    public static function chr_to_decimal(string $char): int
759
    {
760 5
        if (self::$SUPPORT['iconv'] === true) {
761 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
762 5
            if ($chr_tmp !== false) {
763
                /** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */
764 5
                return \unpack('V', $chr_tmp)[1];
765
            }
766
        }
767
768
        $code = self::ord($char[0]);
769
        $bytes = 1;
770
771
        if (!($code & 0x80)) {
772
            // 0xxxxxxx
773
            return $code;
774
        }
775
776
        if (($code & 0xe0) === 0xc0) {
777
            // 110xxxxx
778
            $bytes = 2;
779
            $code &= ~0xc0;
780
        } elseif (($code & 0xf0) === 0xe0) {
781
            // 1110xxxx
782
            $bytes = 3;
783
            $code &= ~0xe0;
784
        } elseif (($code & 0xf8) === 0xf0) {
785
            // 11110xxx
786
            $bytes = 4;
787
            $code &= ~0xf0;
788
        }
789
790
        for ($i = 2; $i <= $bytes; ++$i) {
791
            // 10xxxxxx
792
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
793
        }
794
795
        return $code;
796
    }
797
798
    /**
799
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
800
     *
801
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
802
     *
803
     * @param int|string $char   <p>The input character</p>
804
     * @param string     $prefix [optional]
805
     *
806
     * @psalm-pure
807
     *
808
     * @return string
809
     *                <p>The code point encoded as U+xxxx.</p>
810
     */
811 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
812
    {
813 2
        if ($char === '') {
814 2
            return '';
815
        }
816
817 2
        if ($char === '&#0;') {
818 2
            $char = '';
819
        }
820
821 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
822
    }
823
824
    /**
825
     * alias for "UTF8::chr_to_decimal()"
826
     *
827
     * @param string $chr
828
     *
829
     * @psalm-pure
830
     *
831
     * @return int
832
     *
833
     * @see        UTF8::chr_to_decimal()
834
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
835
     */
836 2
    public static function chr_to_int(string $chr): int
837
    {
838 2
        return self::chr_to_decimal($chr);
839
    }
840
841
    /**
842
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
843
     *
844
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
845
     *
846
     * @param string $body         <p>The original string to be split.</p>
847
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
848
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
849
     *
850
     * @psalm-pure
851
     *
852
     * @return string
853
     *                <p>The chunked string.</p>
854
     */
855 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
856
    {
857 4
        return \implode($end, self::str_split($body, $chunk_length));
858
    }
859
860
    /**
861
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
862
     *
863
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
864
     *
865
     * @param string $str                                     <p>The string to be sanitized.</p>
866
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
867
     *                                                        UTF-BOM.</p>
868
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
869
     *                                                        whitespace.</p>
870
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
871
     *                                                        Word chars e.g.: "…"
872
     *                                                        => "..."</p>
873
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
874
     *                                                        in
875
     *                                                        combination with
876
     *                                                        $normalize_whitespace</p>
877
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
878
     *                                                        question mark e.g.: "�"</p>
879
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
880
     *                                                        invisible characters e.g.: "\0"</p>
881
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
882
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
883
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
884
     *                                                        </p>
885
     *
886
     * @psalm-pure
887
     *
888
     * @return string
889
     *                <p>An clean UTF-8 encoded string.</p>
890
     *
891
     * @noinspection PhpTooManyParametersInspection
892
     */
893 90
    public static function clean(
894
        string $str,
895
        bool $remove_bom = false,
896
        bool $normalize_whitespace = false,
897
        bool $normalize_msword = false,
898
        bool $keep_non_breaking_space = false,
899
        bool $replace_diamond_question_mark = false,
900
        bool $remove_invisible_characters = true,
901
        bool $remove_invisible_characters_url_encoded = false
902
    ): string {
903
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
904
        // caused connection reset problem on larger strings
905
906 90
        $regex = '/
907
          (
908
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
909
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
910
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
911
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
912
            ){1,100}                      # ...one or more times
913
          )
914
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
915
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
916
        /x';
917
        /** @noinspection NotOptimalRegularExpressionsInspection */
918 90
        $str = (string) \preg_replace($regex, '$1', $str);
919
920 90
        if ($replace_diamond_question_mark) {
921 33
            $str = self::replace_diamond_question_mark($str);
922
        }
923
924 90
        if ($remove_invisible_characters) {
925 90
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
926
        }
927
928 90
        if ($normalize_whitespace) {
929 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
930
        }
931
932 90
        if ($normalize_msword) {
933 4
            $str = self::normalize_msword($str);
934
        }
935
936 90
        if ($remove_bom) {
937 37
            $str = self::remove_bom($str);
938
        }
939
940 90
        return $str;
941
    }
942
943
    /**
944
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
945
     *
946
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
947
     *
948
     * @param string $str <p>The input string.</p>
949
     *
950
     * @psalm-pure
951
     *
952
     * @return string
953
     */
954 33
    public static function cleanup($str): string
955
    {
956
        // init
957 33
        $str = (string) $str;
958
959 33
        if ($str === '') {
960 5
            return '';
961
        }
962
963
        // fixed ISO <-> UTF-8 Errors
964 33
        $str = self::fix_simple_utf8($str);
965
966
        // remove all none UTF-8 symbols
967
        // && remove diamond question mark (�)
968
        // && remove remove invisible characters (e.g. "\0")
969
        // && remove BOM
970
        // && normalize whitespace chars (but keep non-breaking-spaces)
971 33
        return self::clean(
972 33
            $str,
973 33
            true,
974 33
            true,
975 33
            false,
976 33
            true,
977 33
            true
978
        );
979
    }
980
981
    /**
982
     * Accepts a string or a array of strings and returns an array of Unicode code points.
983
     *
984
     * INFO: opposite to UTF8::string()
985
     *
986
     * EXAMPLE: <code>
987
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
988
     * // ... OR ...
989
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
990
     * </code>
991
     *
992
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
993
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
994
     *                                     default, code points will be returned as integers.</p>
995
     *
996
     * @psalm-pure
997
     *
998
     * @return int[]|string[]
999
     *                        <p>
1000
     *                        The array of code points:<br>
1001
     *                        int[] for $u_style === false<br>
1002
     *                        string[] for $u_style === true<br>
1003
     *                        </p>
1004
     */
1005 12
    public static function codepoints($arg, bool $use_u_style = false): array
1006
    {
1007 12
        if (\is_string($arg)) {
1008 12
            $arg = self::str_split($arg);
1009
        }
1010
1011
        /**
1012
         * @psalm-suppress DocblockTypeContradiction
1013
         */
1014 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1015 4
            return [];
1016
        }
1017
1018 12
        if ($arg === []) {
1019 7
            return [];
1020
        }
1021
1022 11
        $arg = \array_map(
1023
            [
1024 11
                self::class,
1025
                'ord',
1026
            ],
1027 11
            $arg
1028
        );
1029
1030 11
        if ($use_u_style) {
1031 2
            $arg = \array_map(
1032
                [
1033 2
                    self::class,
1034
                    'int_to_hex',
1035
                ],
1036 2
                $arg
1037
            );
1038
        }
1039
1040 11
        return $arg;
1041
    }
1042
1043
    /**
1044
     * Trims the string and replaces consecutive whitespace characters with a
1045
     * single space. This includes tabs and newline characters, as well as
1046
     * multibyte whitespace such as the thin space and ideographic space.
1047
     *
1048
     * @param string $str <p>The input string.</p>
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return string
1053
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1054
     */
1055 13
    public static function collapse_whitespace(string $str): string
1056
    {
1057 13
        if (self::$SUPPORT['mbstring'] === true) {
1058
            /** @noinspection PhpComposerExtensionStubsInspection */
1059 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1060
        }
1061
1062
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1063
    }
1064
1065
    /**
1066
     * Returns count of characters used in a string.
1067
     *
1068
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1069
     *
1070
     * @param string $str                     <p>The input string.</p>
1071
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1072
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1073
     *
1074
     * @psalm-pure
1075
     *
1076
     * @return int[]
1077
     *               <p>An associative array of Character as keys and
1078
     *               their count as values.</p>
1079
     */
1080 19
    public static function count_chars(
1081
        string $str,
1082
        bool $clean_utf8 = false,
1083
        bool $try_to_use_mb_functions = true
1084
    ): array {
1085 19
        return \array_count_values(
1086 19
            self::str_split(
1087 19
                $str,
1088 19
                1,
1089
                $clean_utf8,
1090
                $try_to_use_mb_functions
1091
            )
1092
        );
1093
    }
1094
1095
    /**
1096
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1097
     *
1098
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1099
     *
1100
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1101
     *
1102
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1103
     * @param string[] $filter
1104
     * @param bool     $strip_tags
1105
     * @param bool     $strtolower
1106
     *
1107
     * @psalm-pure
1108
     *
1109
     * @return string
1110
     *
1111
     * @phpstan-param array<string,string> $filter
1112
     */
1113 1
    public static function css_identifier(
1114
        string $str = '',
1115
        array $filter = [
1116
            ' ' => '-',
1117
            '/' => '-',
1118
            '[' => '',
1119
            ']' => '',
1120
        ],
1121
        bool $strip_tags = false,
1122
        bool $strtolower = true
1123
    ): string {
1124
        // We could also use strtr() here but its much slower than str_replace(). In
1125
        // order to keep '__' to stay '__' we first replace it with a different
1126
        // placeholder after checking that it is not defined as a filter.
1127 1
        $double_underscore_replacements = 0;
1128
1129
        // Fallback ...
1130 1
        if (\trim($str) === '') {
1131 1
            $str = \uniqid('auto-generated-css-class', true);
1132
        } else {
1133 1
            $str = self::clean($str);
1134
        }
1135
1136 1
        if ($strip_tags) {
1137
            $str = \strip_tags($str);
1138
        }
1139
1140 1
        if ($strtolower) {
1141 1
            $str = \strtolower($str);
1142
        }
1143
1144 1
        if (!isset($filter['__'])) {
1145 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1146
        }
1147
1148
        /* @noinspection ArrayValuesMissUseInspection */
1149 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1150
        // Replace temporary placeholder '##' with '__' only if the original
1151
        // $identifier contained '__'.
1152 1
        if ($double_underscore_replacements > 0) {
1153
            $str = \str_replace('##', '__', $str);
1154
        }
1155
1156
        // Valid characters in a CSS identifier are:
1157
        // - the hyphen (U+002D)
1158
        // - a-z (U+0030 - U+0039)
1159
        // - A-Z (U+0041 - U+005A)
1160
        // - the underscore (U+005F)
1161
        // - 0-9 (U+0061 - U+007A)
1162
        // - ISO 10646 characters U+00A1 and higher
1163
        // We strip out any character not in the above list.
1164 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1165
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1166 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1167
1168 1
        return \trim($str, '-');
1169
    }
1170
1171
    /**
1172
     * Remove css media-queries.
1173
     *
1174
     * @param string $str
1175
     *
1176
     * @psalm-pure
1177
     *
1178
     * @return string
1179
     */
1180 1
    public static function css_stripe_media_queries(string $str): string
1181
    {
1182 1
        return (string) \preg_replace(
1183 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1184 1
            '',
1185 1
            $str
1186
        );
1187
    }
1188
1189
    /**
1190
     * Checks whether ctype is available on the server.
1191
     *
1192
     * @psalm-pure
1193
     *
1194
     * @return bool
1195
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
1196
     *
1197
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
1198
     */
1199
    public static function ctype_loaded(): bool
1200
    {
1201
        return \extension_loaded('ctype');
1202
    }
1203
1204
    /**
1205
     * Converts an int value into a UTF-8 character.
1206
     *
1207
     * INFO: opposite to UTF8::string()
1208
     *
1209
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1210
     *
1211
     * @param int|string $int
1212
     *
1213
     * @phpstan-param int|numeric-string $int
1214
     *
1215
     * @psalm-pure
1216
     *
1217
     * @return string
1218
     */
1219 20
    public static function decimal_to_chr($int): string
1220
    {
1221 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1222
    }
1223
1224
    /**
1225
     * Decodes a MIME header field
1226
     *
1227
     * @param string $str
1228
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1229
     *
1230
     * @psalm-pure
1231
     *
1232
     * @return false|string
1233
     *                      <p>A decoded MIME field on success,
1234
     *                      or false if an error occurs during the decoding.</p>
1235
     */
1236 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1237
    {
1238 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1239 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1240
        }
1241
1242
        // always fallback via symfony polyfill
1243 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1244
    }
1245
1246
    /**
1247
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1248
     *
1249
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1250
     *
1251
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1252
     *
1253
     * @return string
1254
     *                <p>Emoji or empty string on error.</p>
1255
     */
1256 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1257
    {
1258 1
        if ($country_code_iso_3166_1 === '') {
1259 1
            return '';
1260
        }
1261
1262 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1263 1
            return '';
1264
        }
1265
1266 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1267
1268 1
        $flagOffset = 0x1F1E6;
1269 1
        $asciiOffset = 0x41;
1270
1271 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1272 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1273
    }
1274
1275
    /**
1276
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1277
     *
1278
     * INFO: opposite to UTF8::emoji_encode()
1279
     *
1280
     * EXAMPLE: <code>
1281
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1282
     * //
1283
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1284
     * </code>
1285
     *
1286
     * @param string $str                            <p>The input string.</p>
1287
     * @param bool   $use_reversible_string_mappings [optional] <p>
1288
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1289
     *                                               between "emoji_encode" and "emoji_decode".</p>
1290
     *
1291
     * @psalm-pure
1292
     *
1293
     * @return string
1294
     */
1295 9
    public static function emoji_decode(
1296
        string $str,
1297
        bool $use_reversible_string_mappings = false
1298
    ): string {
1299 9
        if (self::$EMOJI_KEYS_CACHE === null) {
1300
            /** @phpstan-ignore-next-line - we need to load the data first */
1301
            self::initEmojiData();
1302
        }
1303
1304 9
        if ($use_reversible_string_mappings) {
1305 9
            return (string) \str_replace(
1306 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1307 9
                (array) self::$EMOJI_VALUES_CACHE,
1308 9
                $str
1309
            );
1310
        }
1311
1312 1
        return (string) \str_replace(
1313 1
            (array) self::$EMOJI_KEYS_CACHE,
1314 1
            (array) self::$EMOJI_VALUES_CACHE,
1315 1
            $str
1316
        );
1317
    }
1318
1319
    /**
1320
     * Encode a string with emoji chars into a non-emoji string.
1321
     *
1322
     * INFO: opposite to UTF8::emoji_decode()
1323
     *
1324
     * EXAMPLE: <code>
1325
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1326
     * //
1327
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1328
     * </code>
1329
     *
1330
     * @param string $str                            <p>The input string</p>
1331
     * @param bool   $use_reversible_string_mappings [optional] <p>
1332
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1333
     *                                               between "emoji_encode" and "emoji_decode"</p>
1334
     *
1335
     * @psalm-pure
1336
     *
1337
     * @return string
1338
     */
1339 12
    public static function emoji_encode(
1340
        string $str,
1341
        bool $use_reversible_string_mappings = false
1342
    ): string {
1343 12
        if (self::$EMOJI_KEYS_CACHE === null) {
1344
            /** @phpstan-ignore-next-line - we need to load the data first */
1345 1
            self::initEmojiData();
1346
        }
1347
1348 12
        if ($use_reversible_string_mappings) {
1349 9
            return (string) \str_replace(
1350 9
                (array) self::$EMOJI_VALUES_CACHE,
1351 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1352 9
                $str
1353
            );
1354
        }
1355
1356 4
        return (string) \str_replace(
1357 4
            (array) self::$EMOJI_VALUES_CACHE,
1358 4
            (array) self::$EMOJI_KEYS_CACHE,
1359 4
            $str
1360
        );
1361
    }
1362
1363
    /**
1364
     * Encode a string with a new charset-encoding.
1365
     *
1366
     * INFO:  This function will also try to fix broken / double encoding,
1367
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1368
     *
1369
     * EXAMPLE: <code>
1370
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1371
     * //
1372
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1373
     * //
1374
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1375
     * //
1376
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1377
     * </code>
1378
     *
1379
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1380
     * @param string $str                           <p>The input string</p>
1381
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1382
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1383
     *                                              string-encoding</p>
1384
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1385
     *                                              A empty string will trigger the autodetect anyway.</p>
1386
     *
1387
     * @psalm-pure
1388
     *
1389
     * @return string
1390
     *
1391
     * @psalm-suppress InvalidReturnStatement
1392
     */
1393 29
    public static function encode(
1394
        string $to_encoding,
1395
        string $str,
1396
        bool $auto_detect_the_from_encoding = true,
1397
        string $from_encoding = ''
1398
    ): string {
1399 29
        if ($str === '' || $to_encoding === '') {
1400 13
            return $str;
1401
        }
1402
1403 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1404 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1405
        }
1406
1407 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1408 2
            $from_encoding = self::normalize_encoding($from_encoding);
1409
        }
1410
1411
        if (
1412 29
            $to_encoding
1413
            &&
1414 29
            $from_encoding
1415
            &&
1416 29
            $from_encoding === $to_encoding
1417
        ) {
1418
            return $str;
1419
        }
1420
1421 29
        if ($to_encoding === 'JSON') {
1422 1
            $return = self::json_encode($str);
1423 1
            if ($return === false) {
1424
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1425
            }
1426
1427 1
            return $return;
1428
        }
1429 29
        if ($from_encoding === 'JSON') {
1430 1
            $str = self::json_decode($str);
1431 1
            $from_encoding = '';
1432
        }
1433
1434 29
        if ($to_encoding === 'BASE64') {
1435 2
            return \base64_encode($str);
1436
        }
1437 29
        if ($from_encoding === 'BASE64') {
1438 2
            $str = \base64_decode($str, true);
1439 2
            $from_encoding = '';
1440
        }
1441
1442 29
        if ($to_encoding === 'HTML-ENTITIES') {
1443 2
            return self::html_encode($str, true);
1444
        }
1445 29
        if ($from_encoding === 'HTML-ENTITIES') {
1446 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1447 2
            $from_encoding = '';
1448
        }
1449
1450 29
        $from_encoding_auto_detected = false;
1451
        if (
1452 29
            $auto_detect_the_from_encoding
1453
            ||
1454 29
            !$from_encoding
1455
        ) {
1456 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1457
        }
1458
1459
        // DEBUG
1460
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1461
1462 29
        if ($from_encoding_auto_detected !== false) {
1463
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1464 25
            $from_encoding = $from_encoding_auto_detected;
1465 7
        } elseif ($auto_detect_the_from_encoding) {
1466
            // fallback for the "autodetect"-mode
1467 7
            return self::to_utf8($str);
1468
        }
1469
1470
        if (
1471 25
            !$from_encoding
1472
            ||
1473 25
            $from_encoding === $to_encoding
1474
        ) {
1475 15
            return $str;
1476
        }
1477
1478
        if (
1479 20
            $to_encoding === 'UTF-8'
1480
            &&
1481
            (
1482 18
                $from_encoding === 'WINDOWS-1252'
1483
                ||
1484 20
                $from_encoding === 'ISO-8859-1'
1485
            )
1486
        ) {
1487 14
            return self::to_utf8($str);
1488
        }
1489
1490
        if (
1491 12
            $to_encoding === 'ISO-8859-1'
1492
            &&
1493
            (
1494 6
                $from_encoding === 'WINDOWS-1252'
1495
                ||
1496 12
                $from_encoding === 'UTF-8'
1497
            )
1498
        ) {
1499 6
            return self::to_iso8859($str);
1500
        }
1501
1502
        /** @noinspection InArrayCanBeUsedInspection */
1503
        if (
1504 10
            $to_encoding !== 'UTF-8'
1505
            &&
1506 10
            $to_encoding !== 'ISO-8859-1'
1507
            &&
1508 10
            $to_encoding !== 'WINDOWS-1252'
1509
            &&
1510 10
            self::$SUPPORT['mbstring'] === false
1511
        ) {
1512
            /**
1513
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1514
             */
1515
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1516
        }
1517
1518 10
        if (self::$SUPPORT['mbstring'] === true) {
1519 10
            $str_encoded = \mb_convert_encoding(
1520 10
                $str,
1521 10
                $to_encoding,
1522 10
                $from_encoding
1523
            );
1524
1525 10
            if ($str_encoded) {
1526
                \assert(\is_string($str_encoded));
1527
1528 10
                return $str_encoded;
1529
            }
1530
        }
1531
1532
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1533
        $return = @\iconv($from_encoding, $to_encoding, $str);
1534
        if ($return !== false) {
1535
            return $return;
1536
        }
1537
1538
        return $str;
1539
    }
1540
1541
    /**
1542
     * @param string $str
1543
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1544
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1545
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1546
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1547
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1548
     *
1549
     * @psalm-pure
1550
     *
1551
     * @return false|string
1552
     *                      <p>An encoded MIME field on success,
1553
     *                      or false if an error occurs during the encoding.</p>
1554
     */
1555 1
    public static function encode_mimeheader(
1556
        string $str,
1557
        string $from_charset = 'UTF-8',
1558
        string $to_charset = 'UTF-8',
1559
        string $transfer_encoding = 'Q',
1560
        string $linefeed = "\r\n",
1561
        int $indent = 76
1562
    ) {
1563 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1564
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1565
        }
1566
1567 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1568 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1569
        }
1570
1571
        // always fallback via symfony polyfill
1572 1
        return \iconv_mime_encode(
1573 1
            '',
1574 1
            $str,
1575
            [
1576 1
                'scheme'           => $transfer_encoding,
1577 1
                'line-length'      => $indent,
1578 1
                'input-charset'    => $from_charset,
1579 1
                'output-charset'   => $to_charset,
1580 1
                'line-break-chars' => $linefeed,
1581
            ]
1582
        );
1583
    }
1584
1585
    /**
1586
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1587
     *
1588
     * @param string   $str                       <p>The input string.</p>
1589
     * @param string   $search                    <p>The searched string.</p>
1590
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1591
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1592
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1593
     *
1594
     * @psalm-pure
1595
     *
1596
     * @return string
1597
     */
1598 1
    public static function extract_text(
1599
        string $str,
1600
        string $search = '',
1601
        int $length = null,
1602
        string $replacer_for_skipped_text = '…',
1603
        string $encoding = 'UTF-8'
1604
    ): string {
1605 1
        if ($str === '') {
1606 1
            return '';
1607
        }
1608
1609 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1610
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1611
        }
1612
1613 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1614
1615 1
        if ($length === null) {
1616 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1617
        }
1618
1619 1
        if ($search === '') {
1620 1
            if ($encoding === 'UTF-8') {
1621 1
                if ($length > 0) {
1622 1
                    $string_length = (int) \mb_strlen($str);
1623 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1624
                } else {
1625 1
                    $end = 0;
1626
                }
1627
1628 1
                $pos = (int) \min(
1629 1
                    \mb_strpos($str, ' ', $end),
1630 1
                    \mb_strpos($str, '.', $end)
1631
                );
1632
            } else {
1633
                if ($length > 0) {
1634
                    $string_length = (int) self::strlen($str, $encoding);
1635
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1636
                } else {
1637
                    $end = 0;
1638
                }
1639
1640
                $pos = (int) \min(
1641
                    self::strpos($str, ' ', $end, $encoding),
1642
                    self::strpos($str, '.', $end, $encoding)
1643
                );
1644
            }
1645
1646 1
            if ($pos) {
1647 1
                if ($encoding === 'UTF-8') {
1648 1
                    $str_sub = \mb_substr($str, 0, $pos);
1649
                } else {
1650
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1651
                }
1652
1653 1
                if ($str_sub === false) {
1654
                    return '';
1655
                }
1656
1657 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1658
            }
1659
1660
            return $str;
1661
        }
1662
1663 1
        if ($encoding === 'UTF-8') {
1664 1
            $word_position = (int) \mb_stripos($str, $search);
1665 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1666
        } else {
1667
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1668
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1669
        }
1670
1671 1
        $pos_start = 0;
1672 1
        if ($half_side > 0) {
1673 1
            if ($encoding === 'UTF-8') {
1674 1
                $half_text = \mb_substr($str, 0, $half_side);
1675
            } else {
1676
                $half_text = self::substr($str, 0, $half_side, $encoding);
1677
            }
1678 1
            if ($half_text !== false) {
1679 1
                if ($encoding === 'UTF-8') {
1680 1
                    $pos_start = (int) \max(
1681 1
                        \mb_strrpos($half_text, ' '),
1682 1
                        \mb_strrpos($half_text, '.')
1683
                    );
1684
                } else {
1685
                    $pos_start = (int) \max(
1686
                        self::strrpos($half_text, ' ', 0, $encoding),
1687
                        self::strrpos($half_text, '.', 0, $encoding)
1688
                    );
1689
                }
1690
            }
1691
        }
1692
1693 1
        if ($word_position && $half_side > 0) {
1694 1
            $offset = $pos_start + $length - 1;
1695 1
            $real_length = (int) self::strlen($str, $encoding);
1696
1697 1
            if ($offset > $real_length) {
1698
                $offset = $real_length;
1699
            }
1700
1701 1
            if ($encoding === 'UTF-8') {
1702 1
                $pos_end = (int) \min(
1703 1
                    \mb_strpos($str, ' ', $offset),
1704 1
                    \mb_strpos($str, '.', $offset)
1705 1
                ) - $pos_start;
1706
            } else {
1707
                $pos_end = (int) \min(
1708
                    self::strpos($str, ' ', $offset, $encoding),
1709
                    self::strpos($str, '.', $offset, $encoding)
1710
                ) - $pos_start;
1711
            }
1712
1713 1
            if (!$pos_end || $pos_end <= 0) {
1714 1
                if ($encoding === 'UTF-8') {
1715 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1716
                } else {
1717
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1718
                }
1719 1
                if ($str_sub !== false) {
1720 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1721
                } else {
1722 1
                    $extract = '';
1723
                }
1724
            } else {
1725 1
                if ($encoding === 'UTF-8') {
1726 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1727
                } else {
1728
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1729
                }
1730 1
                if ($str_sub !== false) {
1731 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1732
                } else {
1733 1
                    $extract = '';
1734
                }
1735
            }
1736
        } else {
1737 1
            $offset = $length - 1;
1738 1
            $true_length = (int) self::strlen($str, $encoding);
1739
1740 1
            if ($offset > $true_length) {
1741
                $offset = $true_length;
1742
            }
1743
1744 1
            if ($encoding === 'UTF-8') {
1745 1
                $pos_end = (int) \min(
1746 1
                    \mb_strpos($str, ' ', $offset),
1747 1
                    \mb_strpos($str, '.', $offset)
1748
                );
1749
            } else {
1750
                $pos_end = (int) \min(
1751
                    self::strpos($str, ' ', $offset, $encoding),
1752
                    self::strpos($str, '.', $offset, $encoding)
1753
                );
1754
            }
1755
1756 1
            if ($pos_end) {
1757 1
                if ($encoding === 'UTF-8') {
1758 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1759
                } else {
1760
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1761
                }
1762 1
                if ($str_sub !== false) {
1763 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1764
                } else {
1765 1
                    $extract = '';
1766
                }
1767
            } else {
1768 1
                $extract = $str;
1769
            }
1770
        }
1771
1772 1
        return $extract;
1773
    }
1774
1775
    /**
1776
     * Reads entire file into a string.
1777
     *
1778
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1779
     *
1780
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1781
     *
1782
     * @see http://php.net/manual/en/function.file-get-contents.php
1783
     *
1784
     * @param string        $filename         <p>
1785
     *                                        Name of the file to read.
1786
     *                                        </p>
1787
     * @param bool          $use_include_path [optional] <p>
1788
     *                                        Prior to PHP 5, this parameter is called
1789
     *                                        use_include_path and is a bool.
1790
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1791
     *                                        to trigger include path
1792
     *                                        search.
1793
     *                                        </p>
1794
     * @param resource|null $context          [optional] <p>
1795
     *                                        A valid context resource created with
1796
     *                                        stream_context_create. If you don't need to use a
1797
     *                                        custom context, you can skip this parameter by &null;.
1798
     *                                        </p>
1799
     * @param int|null      $offset           [optional] <p>
1800
     *                                        The offset where the reading starts.
1801
     *                                        </p>
1802
     * @param int|null      $max_length       [optional] <p>
1803
     *                                        Maximum length of data read. The default is to read until end
1804
     *                                        of file is reached.
1805
     *                                        </p>
1806
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1807
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1808
     *                                        some files, because they used non default utf-8 chars. Binary files
1809
     *                                        like images or pdf will not be converted.</p>
1810
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1811
     *                                        A empty string will trigger the autodetect anyway.</p>
1812
     *
1813
     * @psalm-pure
1814
     *
1815
     * @return false|string
1816
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1817
     *
1818
     * @noinspection PhpTooManyParametersInspection
1819
     */
1820 12
    public static function file_get_contents(
1821
        string $filename,
1822
        bool $use_include_path = false,
1823
        $context = null,
1824
        int $offset = null,
1825
        int $max_length = null,
1826
        int $timeout = 10,
1827
        bool $convert_to_utf8 = true,
1828
        string $from_encoding = ''
1829
    ) {
1830
        // init
1831 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1832
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1833 12
        if ($filename === false) {
1834
            return false;
1835
        }
1836
1837 12
        if ($timeout && $context === null) {
1838 9
            $context = \stream_context_create(
1839
                [
1840
                    'http' => [
1841 9
                        'timeout' => $timeout,
1842
                    ],
1843
                ]
1844
            );
1845
        }
1846
1847 12
        if ($offset === null) {
1848 12
            $offset = 0;
1849
        }
1850
1851 12
        if (\is_int($max_length)) {
1852 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1853
        } else {
1854 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1855
        }
1856
1857
        // return false on error
1858 12
        if ($data === false) {
1859
            return false;
1860
        }
1861
1862 12
        if ($convert_to_utf8) {
1863
            if (
1864 12
                !self::is_binary($data, true)
1865
                ||
1866 9
                self::is_utf16($data, false) !== false
1867
                ||
1868 12
                self::is_utf32($data, false) !== false
1869
            ) {
1870 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1871 9
                $data = self::cleanup($data);
1872
            }
1873
        }
1874
1875 12
        return $data;
1876
    }
1877
1878
    /**
1879
     * Checks if a file starts with BOM (Byte Order Mark) character.
1880
     *
1881
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1882
     *
1883
     * @param string $file_path <p>Path to a valid file.</p>
1884
     *
1885
     * @throws \RuntimeException if file_get_contents() returned false
1886
     *
1887
     * @return bool
1888
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1889
     *
1890
     * @psalm-pure
1891
     */
1892 2
    public static function file_has_bom(string $file_path): bool
1893
    {
1894 2
        $file_content = \file_get_contents($file_path);
1895 2
        if ($file_content === false) {
1896
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1897
        }
1898
1899 2
        return self::string_has_bom($file_content);
1900
    }
1901
1902
    /**
1903
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1904
     *
1905
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1906
     *
1907
     * @param array|object|string $var
1908
     * @param int                 $normalization_form
1909
     * @param string              $leading_combining
1910
     *
1911
     * @psalm-pure
1912
     *
1913
     * @return mixed
1914
     *
1915
     * @template TFilter
1916
     * @phpstan-param TFilter $var
1917
     * @phpstan-return TFilter
1918
     */
1919 65
    public static function filter(
1920
        $var,
1921
        int $normalization_form = \Normalizer::NFC,
1922
        string $leading_combining = '◌'
1923
    ) {
1924 65
        switch (\gettype($var)) {
1925 65
            case 'object':
1926 65
            case 'array':
1927 6
                foreach ($var as &$v) {
1928 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1929
                }
1930 6
                unset($v);
1931
1932 6
                break;
1933 65
            case 'string':
1934
1935 63
                if (\strpos($var, "\r") !== false) {
1936 3
                    $var = self::normalize_line_ending($var);
1937
                }
1938
1939 63
                if (!ASCII::is_ascii($var)) {
1940 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1941 27
                        $n = '-';
1942
                    } else {
1943 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1944
1945 13
                        if ($n && isset($n[0])) {
1946 7
                            $var = $n;
1947
                        } else {
1948 9
                            $var = self::encode('UTF-8', $var);
1949
                        }
1950
                    }
1951
1952
                    \assert(\is_string($var));
1953
                    if (
1954 33
                        $n
1955
                        &&
1956 33
                        $var[0] >= "\x80"
1957
                        &&
1958 33
                        isset($n[0], $leading_combining[0])
1959
                        &&
1960 33
                        \preg_match('/^\\p{Mn}/u', $var)
1961
                    ) {
1962
                        // Prevent leading combining chars
1963
                        // for NFC-safe concatenations.
1964 3
                        $var = $leading_combining . $var;
1965
                    }
1966
                }
1967
1968 63
                break;
1969
            default:
1970
                // nothing
1971
        }
1972
1973
        /** @noinspection PhpSillyAssignmentInspection */
1974
        /** @phpstan-var TFilter $var */
1975 65
        $var = $var;
1976
1977 65
        return $var;
1978
    }
1979
1980
    /**
1981
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1982
     *
1983
     * Gets a specific external variable by name and optionally filters it.
1984
     *
1985
     * EXAMPLE: <code>
1986
     * // _GET['foo'] = 'bar';
1987
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1988
     * </code>
1989
     *
1990
     * @see http://php.net/manual/en/function.filter-input.php
1991
     *
1992
     * @param int            $type          <p>
1993
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1994
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1995
     *                                      <b>INPUT_ENV</b>.
1996
     *                                      </p>
1997
     * @param string         $variable_name <p>
1998
     *                                      Name of a variable to get.
1999
     *                                      </p>
2000
     * @param int            $filter        [optional] <p>
2001
     *                                      The ID of the filter to apply. The
2002
     *                                      manual page lists the available filters.
2003
     *                                      </p>
2004
     * @param int|int[]|null $options       [optional] <p>
2005
     *                                      Associative array of options or bitwise disjunction of flags. If filter
2006
     *                                      accepts options, flags can be provided in "flags" field of array.
2007
     *                                      </p>
2008
     *
2009
     * @psalm-pure
2010
     *
2011
     * @return mixed
2012
     *               <p>
2013
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2014
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2015
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2016
     *               </p>
2017
     */
2018 1
    public static function filter_input(
2019
        int $type,
2020
        string $variable_name,
2021
        int $filter = \FILTER_DEFAULT,
2022
        $options = null
2023
    ) {
2024
        /**
2025
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2026
         */
2027 1
        if ($options === null || \func_num_args() < 4) {
2028 1
            $var = \filter_input($type, $variable_name, $filter);
2029
        } else {
2030
            $var = \filter_input($type, $variable_name, $filter, $options);
2031
        }
2032
2033 1
        return self::filter($var);
2034
    }
2035
2036
    /**
2037
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2038
     *
2039
     * Gets external variables and optionally filters them.
2040
     *
2041
     * EXAMPLE: <code>
2042
     * // _GET['foo'] = 'bar';
2043
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2044
     * </code>
2045
     *
2046
     * @see http://php.net/manual/en/function.filter-input-array.php
2047
     *
2048
     * @param int        $type       <p>
2049
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2050
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2051
     *                               <b>INPUT_ENV</b>.
2052
     *                               </p>
2053
     * @param array|null $definition [optional] <p>
2054
     *                               An array defining the arguments. A valid key is a string
2055
     *                               containing a variable name and a valid value is either a filter type, or an array
2056
     *                               optionally specifying the filter, flags and options. If the value is an
2057
     *                               array, valid keys are filter which specifies the
2058
     *                               filter type,
2059
     *                               flags which specifies any flags that apply to the
2060
     *                               filter, and options which specifies any options that
2061
     *                               apply to the filter. See the example below for a better understanding.
2062
     *                               </p>
2063
     *                               <p>
2064
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2065
     *                               input array are filtered by this filter.
2066
     *                               </p>
2067
     * @param bool       $add_empty  [optional] <p>
2068
     *                               Add missing keys as <b>NULL</b> to the return value.
2069
     *                               </p>
2070
     *
2071
     * @psalm-pure
2072
     *
2073
     * @return mixed
2074
     *               <p>
2075
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2076
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2077
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2078
     *               is not set and <b>NULL</b> if the filter fails.
2079
     *               </p>
2080
     */
2081 1
    public static function filter_input_array(
2082
        int $type,
2083
        $definition = null,
2084
        bool $add_empty = true
2085
    ) {
2086
        /**
2087
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2088
         */
2089 1
        if ($definition === null || \func_num_args() < 2) {
2090
            $a = \filter_input_array($type);
2091
        } else {
2092 1
            $a = \filter_input_array($type, $definition, $add_empty);
2093
        }
2094
2095 1
        return self::filter($a);
2096
    }
2097
2098
    /**
2099
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2100
     *
2101
     * Filters a variable with a specified filter.
2102
     *
2103
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2104
     *
2105
     * @see http://php.net/manual/en/function.filter-var.php
2106
     *
2107
     * @param float|int|string|null $variable <p>
2108
     *                                        Value to filter.
2109
     *                                        </p>
2110
     * @param int                   $filter   [optional] <p>
2111
     *                                        The ID of the filter to apply. The
2112
     *                                        manual page lists the available filters.
2113
     *                                        </p>
2114
     * @param int|int[]|null        $options  [optional] <p>
2115
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2116
     *                                        accepts options, flags can be provided in "flags" field of array. For
2117
     *                                        the "callback" filter, callable type should be passed. The
2118
     *                                        callback must accept one argument, the value to be filtered, and return
2119
     *                                        the value after filtering/sanitizing it.
2120
     *                                        </p>
2121
     *                                        <p>
2122
     *                                        <code>
2123
     *                                        // for filters that accept options, use this format
2124
     *                                        $options = array(
2125
     *                                        'options' => array(
2126
     *                                        'default' => 3, // value to return if the filter fails
2127
     *                                        // other options here
2128
     *                                        'min_range' => 0
2129
     *                                        ),
2130
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2131
     *                                        );
2132
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2133
     *                                        // for filter that only accept flags, you can pass them directly
2134
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2135
     *                                        // for filter that only accept flags, you can also pass as an array
2136
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2137
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2138
     *                                        // callback validate filter
2139
     *                                        function foo($value)
2140
     *                                        {
2141
     *                                        // Expected format: Surname, GivenNames
2142
     *                                        if (strpos($value, ", ") === false) return false;
2143
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2144
     *                                        $empty = (empty($surname) || empty($givennames));
2145
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2146
     *                                        if ($empty || $notstrings) {
2147
     *                                        return false;
2148
     *                                        } else {
2149
     *                                        return $value;
2150
     *                                        }
2151
     *                                        }
2152
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2153
     *                                        </code>
2154
     *                                        </p>
2155
     *
2156
     * @psalm-pure
2157
     *
2158
     * @return mixed
2159
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2160
     */
2161 2
    public static function filter_var(
2162
        $variable,
2163
        int $filter = \FILTER_DEFAULT,
2164
        $options = null
2165
    ) {
2166
        /**
2167
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2168
         */
2169 2
        if (\func_num_args() < 3) {
2170 2
            $variable = \filter_var($variable, $filter);
2171
        } else {
2172 2
            $variable = \filter_var($variable, $filter, $options);
0 ignored issues
show
Bug introduced by
It seems like $options can also be of type null; however, parameter $options of filter_var() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2172
            $variable = \filter_var($variable, $filter, /** @scrutinizer ignore-type */ $options);
Loading history...
2173
        }
2174
2175 2
        return self::filter($variable);
2176
    }
2177
2178
    /**
2179
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2180
     *
2181
     * Gets multiple variables and optionally filters them.
2182
     *
2183
     * EXAMPLE: <code>
2184
     * $filters = [
2185
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2186
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2187
     *     'email' => FILTER_VALIDATE_EMAIL,
2188
     * ];
2189
     *
2190
     * $data = [
2191
     *     'name' => 'κόσμε',
2192
     *     'age' => '18',
2193
     *     'email' => '[email protected]'
2194
     * ];
2195
     *
2196
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2197
     * </code>
2198
     *
2199
     * @see http://php.net/manual/en/function.filter-var-array.php
2200
     *
2201
     * @param array<mixed>   $data       <p>
2202
     *                                   An array with string keys containing the data to filter.
2203
     *                                   </p>
2204
     * @param array|int|null $definition [optional] <p>
2205
     *                                   An array defining the arguments. A valid key is a string
2206
     *                                   containing a variable name and a valid value is either a
2207
     *                                   filter type, or an
2208
     *                                   array optionally specifying the filter, flags and options.
2209
     *                                   If the value is an array, valid keys are filter
2210
     *                                   which specifies the filter type,
2211
     *                                   flags which specifies any flags that apply to the
2212
     *                                   filter, and options which specifies any options that
2213
     *                                   apply to the filter. See the example below for a better understanding.
2214
     *                                   </p>
2215
     *                                   <p>
2216
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2217
     *                                   in the input array are filtered by this filter.
2218
     *                                   </p>
2219
     * @param bool           $add_empty  [optional] <p>
2220
     *                                   Add missing keys as <b>NULL</b> to the return value.
2221
     *                                   </p>
2222
     *
2223
     * @psalm-pure
2224
     *
2225
     * @return mixed
2226
     *               <p>
2227
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2228
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2229
     *               set.
2230
     *               </p>
2231
     */
2232 2
    public static function filter_var_array(
2233
        array $data,
2234
        $definition = null,
2235
        bool $add_empty = true
2236
    ) {
2237
        /**
2238
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2239
         */
2240 2
        if (\func_num_args() < 2) {
2241 2
            $a = \filter_var_array($data);
2242
        } else {
2243 2
            $a = \filter_var_array($data, $definition, $add_empty);
0 ignored issues
show
Bug introduced by
It seems like $definition can also be of type null; however, parameter $options of filter_var_array() does only seem to accept array|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2243
            $a = \filter_var_array($data, /** @scrutinizer ignore-type */ $definition, $add_empty);
Loading history...
2244
        }
2245
2246 2
        return self::filter($a);
2247
    }
2248
2249
    /**
2250
     * Checks whether finfo is available on the server.
2251
     *
2252
     * @psalm-pure
2253
     *
2254
     * @return bool
2255
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
2256
     *
2257
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
2258
     */
2259
    public static function finfo_loaded(): bool
2260
    {
2261
        return \class_exists('finfo');
2262
    }
2263
2264
    /**
2265
     * Returns the first $n characters of the string.
2266
     *
2267
     * @param string $str      <p>The input string.</p>
2268
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2269
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2270
     *
2271
     * @psalm-pure
2272
     *
2273
     * @return string
2274
     */
2275 13
    public static function first_char(
2276
        string $str,
2277
        int $n = 1,
2278
        string $encoding = 'UTF-8'
2279
    ): string {
2280 13
        if ($str === '' || $n <= 0) {
2281 5
            return '';
2282
        }
2283
2284 8
        if ($encoding === 'UTF-8') {
2285 4
            return (string) \mb_substr($str, 0, $n);
2286
        }
2287
2288 4
        return (string) self::substr($str, 0, $n, $encoding);
2289
    }
2290
2291
    /**
2292
     * Check if the number of Unicode characters isn't greater than the specified integer.
2293
     *
2294
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2295
     *
2296
     * @param string $str      the original string to be checked
2297
     * @param int    $box_size the size in number of chars to be checked against string
2298
     *
2299
     * @psalm-pure
2300
     *
2301
     * @return bool
2302
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2303
     */
2304 2
    public static function fits_inside(string $str, int $box_size): bool
2305
    {
2306 2
        return (int) self::strlen($str) <= $box_size;
2307
    }
2308
2309
    /**
2310
     * Try to fix simple broken UTF-8 strings.
2311
     *
2312
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2313
     *
2314
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2315
     *
2316
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2317
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2318
     * See: http://en.wikipedia.org/wiki/Windows-1252
2319
     *
2320
     * @param string $str <p>The input string</p>
2321
     *
2322
     * @psalm-pure
2323
     *
2324
     * @return string
2325
     */
2326 47
    public static function fix_simple_utf8(string $str): string
2327
    {
2328 47
        if ($str === '') {
2329 4
            return '';
2330
        }
2331
2332
        /**
2333
         * @psalm-suppress ImpureStaticVariable
2334
         *
2335
         * @var array<mixed>|null
2336
         */
2337 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2338
2339
        /**
2340
         * @psalm-suppress ImpureStaticVariable
2341
         *
2342
         * @var array<mixed>|null
2343
         */
2344 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2345
2346 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2347 1
            if (self::$BROKEN_UTF8_FIX === null) {
2348 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2349
            }
2350
2351 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2352 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2353
        }
2354
2355
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2356
2357 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2358
    }
2359
2360
    /**
2361
     * Fix a double (or multiple) encoded UTF8 string.
2362
     *
2363
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2364
     *
2365
     * @param string|string[] $str you can use a string or an array of strings
2366
     *
2367
     * @psalm-pure
2368
     *
2369
     * @return string|string[]
2370
     *                         <p>Will return the fixed input-"array" or
2371
     *                         the fixed input-"string".</p>
2372
     *
2373
     * @template TFixUtf8
2374
     * @phpstan-param TFixUtf8 $str
2375
     * @phpstan-return TFixUtf8
2376
     */
2377 2
    public static function fix_utf8($str)
2378
    {
2379 2
        if (\is_array($str)) {
2380 2
            foreach ($str as &$v) {
2381 2
                $v = self::fix_utf8($v);
2382
            }
2383 2
            unset($v);
2384
2385
            /**
2386
             * @psalm-suppress InvalidReturnStatement
2387
             */
2388 2
            return $str;
2389
        }
2390
2391 2
        $str = (string) $str;
2392 2
        $last = '';
2393 2
        while ($last !== $str) {
2394 2
            $last = $str;
2395
            /**
2396
             * @psalm-suppress PossiblyInvalidArgument
2397
             */
2398 2
            $str = self::to_utf8(
2399 2
                self::utf8_decode($str, true)
2400
            );
2401
        }
2402
2403
        /**
2404
         * @psalm-suppress InvalidReturnStatement
2405
         */
2406 2
        return $str;
2407
    }
2408
2409
    /**
2410
     * Get character of a specific character.
2411
     *
2412
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2413
     *
2414
     * @param string $char
2415
     *
2416
     * @psalm-pure
2417
     *
2418
     * @return string
2419
     *                <p>'RTL' or 'LTR'.</p>
2420
     */
2421 2
    public static function getCharDirection(string $char): string
2422
    {
2423 2
        if (self::$SUPPORT['intlChar'] === true) {
2424
            /** @noinspection PhpComposerExtensionStubsInspection */
2425 2
            $tmp_return = \IntlChar::charDirection($char);
2426
2427
            // from "IntlChar"-Class
2428 2
            $char_direction = [
2429
                'RTL' => [1, 13, 14, 15, 21],
2430
                'LTR' => [0, 11, 12, 20],
2431
            ];
2432
2433 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2434
                return 'LTR';
2435
            }
2436
2437 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2438 2
                return 'RTL';
2439
            }
2440
        }
2441
2442 2
        $c = static::chr_to_decimal($char);
2443
2444 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2445 2
            return 'LTR';
2446
        }
2447
2448 2
        if ($c <= 0x85e) {
2449 2
            if ($c === 0x5be ||
2450 2
                $c === 0x5c0 ||
2451 2
                $c === 0x5c3 ||
2452 2
                $c === 0x5c6 ||
2453 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2454 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2455 2
                $c === 0x608 ||
2456 2
                $c === 0x60b ||
2457 2
                $c === 0x60d ||
2458 2
                $c === 0x61b ||
2459 2
                ($c >= 0x61e && $c <= 0x64a) ||
2460
                ($c >= 0x66d && $c <= 0x66f) ||
2461
                ($c >= 0x671 && $c <= 0x6d5) ||
2462
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2463
                ($c >= 0x6ee && $c <= 0x6ef) ||
2464
                ($c >= 0x6fa && $c <= 0x70d) ||
2465
                $c === 0x710 ||
2466
                ($c >= 0x712 && $c <= 0x72f) ||
2467
                ($c >= 0x74d && $c <= 0x7a5) ||
2468
                $c === 0x7b1 ||
2469
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2470
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2471
                $c === 0x7fa ||
2472
                ($c >= 0x800 && $c <= 0x815) ||
2473
                $c === 0x81a ||
2474
                $c === 0x824 ||
2475
                $c === 0x828 ||
2476
                ($c >= 0x830 && $c <= 0x83e) ||
2477
                ($c >= 0x840 && $c <= 0x858) ||
2478 2
                $c === 0x85e
2479
            ) {
2480 2
                return 'RTL';
2481
            }
2482 2
        } elseif ($c === 0x200f) {
2483
            return 'RTL';
2484 2
        } elseif ($c >= 0xfb1d) {
2485 2
            if ($c === 0xfb1d ||
2486 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2487 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2488 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2489 2
                $c === 0xfb3e ||
2490 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2491 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2492 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2493 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2494 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2495 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2496 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2497 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2498 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2499 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2500 2
                $c === 0x10808 ||
2501 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2502 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2503 2
                $c === 0x1083c ||
2504 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2505 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2506 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2507 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2508 2
                $c === 0x1093f ||
2509 2
                $c === 0x10a00 ||
2510 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2511 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2512 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2513 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2514 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2515 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2516 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2517 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2518 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2519 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2520
            ) {
2521 2
                return 'RTL';
2522
            }
2523
        }
2524
2525 2
        return 'LTR';
2526
    }
2527
2528
    /**
2529
     * Check for php-support.
2530
     *
2531
     * @param string|null $key
2532
     *
2533
     * @psalm-pure
2534
     *
2535
     * @return mixed
2536
     *               Return the full support-"array", if $key === null<br>
2537
     *               return bool-value, if $key is used and available<br>
2538
     *               otherwise return <strong>null</strong>
2539
     */
2540 27
    public static function getSupportInfo(string $key = null)
2541
    {
2542 27
        if ($key === null) {
2543 4
            return self::$SUPPORT;
2544
        }
2545
2546 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2547 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2548
        }
2549
        // compatibility fix for old versions
2550 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2551
2552 25
        return self::$SUPPORT[$key] ?? null;
2553
    }
2554
2555
    /**
2556
     * Warning: this method only works for some file-types (png, jpg)
2557
     *          if you need more supported types, please use e.g. "finfo"
2558
     *
2559
     * @param string $str
2560
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2561
     *
2562
     * @psalm-pure
2563
     *
2564
     * @return null[]|string[]
2565
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2566
     *
2567
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2568
     */
2569 40
    public static function get_file_type(
2570
        string $str,
2571
        array $fallback = [
2572
            'ext'  => null,
2573
            'mime' => 'application/octet-stream',
2574
            'type' => null,
2575
        ]
2576
    ): array {
2577 40
        if ($str === '') {
2578
            return $fallback;
2579
        }
2580
2581
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2582 40
        $str_info = \substr($str, 0, 2);
2583 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2584 11
            return $fallback;
2585
        }
2586
2587
        // DEBUG
2588
        //var_dump($str_info);
2589
2590 36
        $str_info = \unpack('C2chars', $str_info);
2591
2592
        /** @noinspection PhpSillyAssignmentInspection */
2593
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2594 36
        $str_info = $str_info;
2595
2596 36
        if ($str_info === false) {
2597
            return $fallback;
2598
        }
2599
        /** @noinspection OffsetOperationsInspection */
2600 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2601
2602
        // DEBUG
2603
        //var_dump($type_code);
2604
2605
        //
2606
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2607
        //
2608
        switch ($type_code) {
2609
            // WARNING: do not add too simple comparisons, because of false-positive results:
2610
            //
2611
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2612
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2613
            //
2614 36
            case 255216:
2615
                $ext = 'jpg';
2616
                $mime = 'image/jpeg';
2617
                $type = 'binary';
2618
2619
                break;
2620 36
            case 13780:
2621 7
                $ext = 'png';
2622 7
                $mime = 'image/png';
2623 7
                $type = 'binary';
2624
2625 7
                break;
2626
            default:
2627 35
                return $fallback;
2628
        }
2629
2630
        return [
2631 7
            'ext'  => $ext,
2632 7
            'mime' => $mime,
2633 7
            'type' => $type,
2634
        ];
2635
    }
2636
2637
    /**
2638
     * @param int    $length         <p>Length of the random string.</p>
2639
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2640
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2641
     *
2642
     * @return string
2643
     */
2644 1
    public static function get_random_string(
2645
        int $length,
2646
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2647
        string $encoding = 'UTF-8'
2648
    ): string {
2649
        // init
2650 1
        $i = 0;
2651 1
        $str = '';
2652
2653
        //
2654
        // add random chars
2655
        //
2656
2657 1
        if ($encoding === 'UTF-8') {
2658 1
            $max_length = (int) \mb_strlen($possible_chars);
2659 1
            if ($max_length === 0) {
2660 1
                return '';
2661
            }
2662
2663 1
            while ($i < $length) {
2664
                try {
2665 1
                    $rand_int = \random_int(0, $max_length - 1);
2666
                } catch (\Exception $e) {
2667
                    /** @noinspection RandomApiMigrationInspection */
2668
                    $rand_int = \mt_rand(0, $max_length - 1);
2669
                }
2670 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2671 1
                if ($char !== false) {
2672 1
                    $str .= $char;
2673 1
                    ++$i;
2674
                }
2675
            }
2676
        } else {
2677
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2678
2679
            $max_length = (int) self::strlen($possible_chars, $encoding);
2680
            if ($max_length === 0) {
2681
                return '';
2682
            }
2683
2684
            while ($i < $length) {
2685
                try {
2686
                    $rand_int = \random_int(0, $max_length - 1);
2687
                } catch (\Exception $e) {
2688
                    /** @noinspection RandomApiMigrationInspection */
2689
                    $rand_int = \mt_rand(0, $max_length - 1);
2690
                }
2691
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2692
                if ($char !== false) {
2693
                    $str .= $char;
2694
                    ++$i;
2695
                }
2696
            }
2697
        }
2698
2699 1
        return $str;
2700
    }
2701
2702
    /**
2703
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2704
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2705
     *
2706
     * @return string
2707
     */
2708 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2709
    {
2710
        try {
2711 1
            $rand_int = \random_int(0, \mt_getrandmax());
2712
        } catch (\Exception $e) {
2713
            /** @noinspection RandomApiMigrationInspection */
2714
            $rand_int = \mt_rand(0, \mt_getrandmax());
2715
        }
2716
2717 1
        $unique_helper = $rand_int .
2718 1
                         \session_id() .
2719 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2720 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2721 1
                         $extra_entropy;
2722
2723 1
        $unique_string = \uniqid($unique_helper, true);
2724
2725 1
        if ($use_md5) {
2726 1
            $unique_string = \md5($unique_string . $unique_helper);
2727
        }
2728
2729 1
        return $unique_string;
2730
    }
2731
2732
    /**
2733
     * alias for "UTF8::string_has_bom()"
2734
     *
2735
     * @param string $str
2736
     *
2737
     * @psalm-pure
2738
     *
2739
     * @return bool
2740
     *
2741
     * @see        UTF8::string_has_bom()
2742
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2743
     */
2744 2
    public static function hasBom(string $str): bool
2745
    {
2746 2
        return self::string_has_bom($str);
2747
    }
2748
2749
    /**
2750
     * Returns true if the string contains a lower case char, false otherwise.
2751
     *
2752
     * @param string $str <p>The input string.</p>
2753
     *
2754
     * @psalm-pure
2755
     *
2756
     * @return bool
2757
     *              <p>Whether or not the string contains a lower case character.</p>
2758
     */
2759 47
    public static function has_lowercase(string $str): bool
2760
    {
2761 47
        if (self::$SUPPORT['mbstring'] === true) {
2762
            /** @noinspection PhpComposerExtensionStubsInspection */
2763 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2764
        }
2765
2766
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2767
    }
2768
2769
    /**
2770
     * Returns true if the string contains whitespace, false otherwise.
2771
     *
2772
     * @param string $str <p>The input string.</p>
2773
     *
2774
     * @psalm-pure
2775
     *
2776
     * @return bool
2777
     *              <p>Whether or not the string contains whitespace.</p>
2778
     */
2779 11
    public static function has_whitespace(string $str): bool
2780
    {
2781 11
        if (self::$SUPPORT['mbstring'] === true) {
2782
            /** @noinspection PhpComposerExtensionStubsInspection */
2783 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2784
        }
2785
2786
        return self::str_matches_pattern($str, '.*[[:space:]]');
2787
    }
2788
2789
    /**
2790
     * Returns true if the string contains an upper case char, false otherwise.
2791
     *
2792
     * @param string $str <p>The input string.</p>
2793
     *
2794
     * @psalm-pure
2795
     *
2796
     * @return bool
2797
     *              <p>Whether or not the string contains an upper case character.</p>
2798
     */
2799 12
    public static function has_uppercase(string $str): bool
2800
    {
2801 12
        if (self::$SUPPORT['mbstring'] === true) {
2802
            /** @noinspection PhpComposerExtensionStubsInspection */
2803 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2804
        }
2805
2806
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2807
    }
2808
2809
    /**
2810
     * Converts a hexadecimal value into a UTF-8 character.
2811
     *
2812
     * INFO: opposite to UTF8::chr_to_hex()
2813
     *
2814
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2815
     *
2816
     * @param string $hexdec <p>The hexadecimal value.</p>
2817
     *
2818
     * @psalm-pure
2819
     *
2820
     * @return false|string one single UTF-8 character
2821
     */
2822 4
    public static function hex_to_chr(string $hexdec)
2823
    {
2824
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2825 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2826
    }
2827
2828
    /**
2829
     * Converts hexadecimal U+xxxx code point representation to integer.
2830
     *
2831
     * INFO: opposite to UTF8::int_to_hex()
2832
     *
2833
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2834
     *
2835
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2836
     *
2837
     * @psalm-pure
2838
     *
2839
     * @return false|int
2840
     *                   <p>The code point, or false on failure.</p>
2841
     */
2842 2
    public static function hex_to_int($hexdec)
2843
    {
2844
        // init
2845 2
        $hexdec = (string) $hexdec;
2846
2847 2
        if ($hexdec === '') {
2848 2
            return false;
2849
        }
2850
2851 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2852 2
            return \intval($match[1], 16);
2853
        }
2854
2855 2
        return false;
2856
    }
2857
2858
    /**
2859
     * alias for "UTF8::html_entity_decode()"
2860
     *
2861
     * @param string   $str
2862
     * @param int|null $flags
2863
     * @param string   $encoding
2864
     *
2865
     * @psalm-pure
2866
     *
2867
     * @return string
2868
     *
2869
     * @see        UTF8::html_entity_decode()
2870
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2871
     */
2872 2
    public static function html_decode(
2873
        string $str,
2874
        int $flags = null,
2875
        string $encoding = 'UTF-8'
2876
    ): string {
2877 2
        return self::html_entity_decode($str, $flags, $encoding);
2878
    }
2879
2880
    /**
2881
     * Converts a UTF-8 string to a series of HTML numbered entities.
2882
     *
2883
     * INFO: opposite to UTF8::html_decode()
2884
     *
2885
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2886
     *
2887
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2888
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2889
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2890
     *
2891
     * @psalm-pure
2892
     *
2893
     * @return string HTML numbered entities
2894
     */
2895 14
    public static function html_encode(
2896
        string $str,
2897
        bool $keep_ascii_chars = false,
2898
        string $encoding = 'UTF-8'
2899
    ): string {
2900 14
        if ($str === '') {
2901 4
            return '';
2902
        }
2903
2904 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2905 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2906
        }
2907
2908
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2909 14
        if (self::$SUPPORT['mbstring'] === true) {
2910 14
            if ($keep_ascii_chars) {
2911 13
                $start_code = 0x80;
2912
            } else {
2913 3
                $start_code = 0x00;
2914
            }
2915
2916 14
            if ($encoding === 'UTF-8') {
2917
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2918 14
                $return = \mb_encode_numericentity(
0 ignored issues
show
Bug introduced by
The call to mb_encode_numericentity() has too few arguments starting with encoding. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

2918
                $return = /** @scrutinizer ignore-call */ \mb_encode_numericentity(

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
2919 14
                    $str,
2920 14
                    [$start_code, 0xfffff, 0, 0xfffff]
2921
                );
2922 14
                if ($return !== null && $return !== false) {
2923 14
                    return $return;
2924
                }
2925
            }
2926
2927
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2928 4
            $return = \mb_encode_numericentity(
2929 4
                $str,
2930 4
                [$start_code, 0xfffff, 0, 0xfffff],
2931 4
                $encoding
2932
            );
2933 4
            if ($return !== null && $return !== false) {
2934 4
                return $return;
2935
            }
2936
        }
2937
2938
        //
2939
        // fallback via vanilla php
2940
        //
2941
2942
        return \implode(
2943
            '',
2944
            \array_map(
2945
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2946
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2947
                },
2948
                self::str_split($str)
2949
            )
2950
        );
2951
    }
2952
2953
    /**
2954
     * UTF-8 version of html_entity_decode()
2955
     *
2956
     * The reason we are not using html_entity_decode() by itself is because
2957
     * while it is not technically correct to leave out the semicolon
2958
     * at the end of an entity most browsers will still interpret the entity
2959
     * correctly. html_entity_decode() does not convert entities without
2960
     * semicolons, so we are left with our own little solution here. Bummer.
2961
     *
2962
     * Convert all HTML entities to their applicable characters.
2963
     *
2964
     * INFO: opposite to UTF8::html_encode()
2965
     *
2966
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2967
     *
2968
     * @see http://php.net/manual/en/function.html-entity-decode.php
2969
     *
2970
     * @param string   $str      <p>
2971
     *                           The input string.
2972
     *                           </p>
2973
     * @param int|null $flags    [optional] <p>
2974
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2975
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2976
     *                           <table>
2977
     *                           Available <i>flags</i> constants
2978
     *                           <tr valign="top">
2979
     *                           <td>Constant Name</td>
2980
     *                           <td>Description</td>
2981
     *                           </tr>
2982
     *                           <tr valign="top">
2983
     *                           <td><b>ENT_COMPAT</b></td>
2984
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2985
     *                           </tr>
2986
     *                           <tr valign="top">
2987
     *                           <td><b>ENT_QUOTES</b></td>
2988
     *                           <td>Will convert both double and single quotes.</td>
2989
     *                           </tr>
2990
     *                           <tr valign="top">
2991
     *                           <td><b>ENT_NOQUOTES</b></td>
2992
     *                           <td>Will leave both double and single quotes unconverted.</td>
2993
     *                           </tr>
2994
     *                           <tr valign="top">
2995
     *                           <td><b>ENT_HTML401</b></td>
2996
     *                           <td>
2997
     *                           Handle code as HTML 4.01.
2998
     *                           </td>
2999
     *                           </tr>
3000
     *                           <tr valign="top">
3001
     *                           <td><b>ENT_XML1</b></td>
3002
     *                           <td>
3003
     *                           Handle code as XML 1.
3004
     *                           </td>
3005
     *                           </tr>
3006
     *                           <tr valign="top">
3007
     *                           <td><b>ENT_XHTML</b></td>
3008
     *                           <td>
3009
     *                           Handle code as XHTML.
3010
     *                           </td>
3011
     *                           </tr>
3012
     *                           <tr valign="top">
3013
     *                           <td><b>ENT_HTML5</b></td>
3014
     *                           <td>
3015
     *                           Handle code as HTML 5.
3016
     *                           </td>
3017
     *                           </tr>
3018
     *                           </table>
3019
     *                           </p>
3020
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3021
     *
3022
     * @psalm-pure
3023
     *
3024
     * @return string the decoded string
3025
     */
3026 51
    public static function html_entity_decode(
3027
        string $str,
3028
        int $flags = null,
3029
        string $encoding = 'UTF-8'
3030
    ): string {
3031
        if (
3032 51
            !isset($str[3]) // examples: &; || &x;
3033
            ||
3034 51
            \strpos($str, '&') === false // no "&"
3035
        ) {
3036 24
            return $str;
3037
        }
3038
3039 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3040 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3041
        }
3042
3043 49
        if ($flags === null) {
3044 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3045
        }
3046
3047
        /** @noinspection InArrayCanBeUsedInspection */
3048
        if (
3049 49
            $encoding !== 'UTF-8'
3050
            &&
3051 49
            $encoding !== 'ISO-8859-1'
3052
            &&
3053 49
            $encoding !== 'WINDOWS-1252'
3054
            &&
3055 49
            self::$SUPPORT['mbstring'] === false
3056
        ) {
3057
            /**
3058
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3059
             */
3060
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3061
        }
3062
3063
        do {
3064 49
            $str_compare = $str;
3065
3066 49
            if (\strpos($str, '&') !== false) {
3067 49
                if (\strpos($str, '&#') !== false) {
3068
                    // decode also numeric & UTF16 two byte entities
3069 41
                    $str = (string) \preg_replace(
3070 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3071 41
                        '$1;',
3072 41
                        $str
3073
                    );
3074
                }
3075
3076 49
                $str = \html_entity_decode(
3077 49
                    $str,
3078 49
                    $flags,
3079 49
                    $encoding
3080
                );
3081
            }
3082 49
        } while ($str_compare !== $str);
3083
3084 49
        return $str;
3085
    }
3086
3087
    /**
3088
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3089
     *
3090
     * @param string $str
3091
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3092
     *
3093
     * @psalm-pure
3094
     *
3095
     * @return string
3096
     */
3097 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3098
    {
3099 6
        return self::htmlspecialchars(
3100 6
            $str,
3101 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3102
            $encoding
3103
        );
3104
    }
3105
3106
    /**
3107
     * Remove empty html-tag.
3108
     *
3109
     * e.g.: <pre><tag></tag></pre>
3110
     *
3111
     * @param string $str
3112
     *
3113
     * @psalm-pure
3114
     *
3115
     * @return string
3116
     */
3117 1
    public static function html_stripe_empty_tags(string $str): string
3118
    {
3119 1
        return (string) \preg_replace(
3120 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3121 1
            '',
3122 1
            $str
3123
        );
3124
    }
3125
3126
    /**
3127
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3128
     *
3129
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3130
     *
3131
     * @see http://php.net/manual/en/function.htmlentities.php
3132
     *
3133
     * @param string $str           <p>
3134
     *                              The input string.
3135
     *                              </p>
3136
     * @param int    $flags         [optional] <p>
3137
     *                              A bitmask of one or more of the following flags, which specify how to handle
3138
     *                              quotes, invalid code unit sequences and the used document type. The default is
3139
     *                              ENT_COMPAT | ENT_HTML401.
3140
     *                              <table>
3141
     *                              Available <i>flags</i> constants
3142
     *                              <tr valign="top">
3143
     *                              <td>Constant Name</td>
3144
     *                              <td>Description</td>
3145
     *                              </tr>
3146
     *                              <tr valign="top">
3147
     *                              <td><b>ENT_COMPAT</b></td>
3148
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3149
     *                              </tr>
3150
     *                              <tr valign="top">
3151
     *                              <td><b>ENT_QUOTES</b></td>
3152
     *                              <td>Will convert both double and single quotes.</td>
3153
     *                              </tr>
3154
     *                              <tr valign="top">
3155
     *                              <td><b>ENT_NOQUOTES</b></td>
3156
     *                              <td>Will leave both double and single quotes unconverted.</td>
3157
     *                              </tr>
3158
     *                              <tr valign="top">
3159
     *                              <td><b>ENT_IGNORE</b></td>
3160
     *                              <td>
3161
     *                              Silently discard invalid code unit sequences instead of returning
3162
     *                              an empty string. Using this flag is discouraged as it
3163
     *                              may have security implications.
3164
     *                              </td>
3165
     *                              </tr>
3166
     *                              <tr valign="top">
3167
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3168
     *                              <td>
3169
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3170
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3171
     *                              string.
3172
     *                              </td>
3173
     *                              </tr>
3174
     *                              <tr valign="top">
3175
     *                              <td><b>ENT_DISALLOWED</b></td>
3176
     *                              <td>
3177
     *                              Replace invalid code points for the given document type with a
3178
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3179
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3180
     *                              instance, to ensure the well-formedness of XML documents with
3181
     *                              embedded external content.
3182
     *                              </td>
3183
     *                              </tr>
3184
     *                              <tr valign="top">
3185
     *                              <td><b>ENT_HTML401</b></td>
3186
     *                              <td>
3187
     *                              Handle code as HTML 4.01.
3188
     *                              </td>
3189
     *                              </tr>
3190
     *                              <tr valign="top">
3191
     *                              <td><b>ENT_XML1</b></td>
3192
     *                              <td>
3193
     *                              Handle code as XML 1.
3194
     *                              </td>
3195
     *                              </tr>
3196
     *                              <tr valign="top">
3197
     *                              <td><b>ENT_XHTML</b></td>
3198
     *                              <td>
3199
     *                              Handle code as XHTML.
3200
     *                              </td>
3201
     *                              </tr>
3202
     *                              <tr valign="top">
3203
     *                              <td><b>ENT_HTML5</b></td>
3204
     *                              <td>
3205
     *                              Handle code as HTML 5.
3206
     *                              </td>
3207
     *                              </tr>
3208
     *                              </table>
3209
     *                              </p>
3210
     * @param string $encoding      [optional] <p>
3211
     *                              Like <b>htmlspecialchars</b>,
3212
     *                              <b>htmlentities</b> takes an optional third argument
3213
     *                              <i>encoding</i> which defines encoding used in
3214
     *                              conversion.
3215
     *                              Although this argument is technically optional, you are highly
3216
     *                              encouraged to specify the correct value for your code.
3217
     *                              </p>
3218
     * @param bool   $double_encode [optional] <p>
3219
     *                              When <i>double_encode</i> is turned off PHP will not
3220
     *                              encode existing html entities. The default is to convert everything.
3221
     *                              </p>
3222
     *
3223
     * @psalm-pure
3224
     *
3225
     * @return string
3226
     *                <p>
3227
     *                The encoded string.
3228
     *                <br><br>
3229
     *                If the input <i>string</i> contains an invalid code unit
3230
     *                sequence within the given <i>encoding</i> an empty string
3231
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3232
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3233
     *                </p>
3234
     */
3235 9
    public static function htmlentities(
3236
        string $str,
3237
        int $flags = \ENT_COMPAT,
3238
        string $encoding = 'UTF-8',
3239
        bool $double_encode = true
3240
    ): string {
3241 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3242 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3243
        }
3244
3245 9
        $str = \htmlentities(
3246 9
            $str,
3247 9
            $flags,
3248 9
            $encoding,
3249 9
            $double_encode
3250
        );
3251
3252
        /**
3253
         * PHP doesn't replace a backslash to its html entity since this is something
3254
         * that's mostly used to escape characters when inserting in a database. Since
3255
         * we're using a decent database layer, we don't need this shit and we're replacing
3256
         * the double backslashes by its' html entity equivalent.
3257
         *
3258
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3259
         */
3260 9
        $str = \str_replace('\\', '&#92;', $str);
3261
3262 9
        return self::html_encode($str, true, $encoding);
3263
    }
3264
3265
    /**
3266
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3267
     *
3268
     * INFO: Take a look at "UTF8::htmlentities()"
3269
     *
3270
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3271
     *
3272
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3273
     *
3274
     * @param string $str           <p>
3275
     *                              The string being converted.
3276
     *                              </p>
3277
     * @param int    $flags         [optional] <p>
3278
     *                              A bitmask of one or more of the following flags, which specify how to handle
3279
     *                              quotes, invalid code unit sequences and the used document type. The default is
3280
     *                              ENT_COMPAT | ENT_HTML401.
3281
     *                              <table>
3282
     *                              Available <i>flags</i> constants
3283
     *                              <tr valign="top">
3284
     *                              <td>Constant Name</td>
3285
     *                              <td>Description</td>
3286
     *                              </tr>
3287
     *                              <tr valign="top">
3288
     *                              <td><b>ENT_COMPAT</b></td>
3289
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3290
     *                              </tr>
3291
     *                              <tr valign="top">
3292
     *                              <td><b>ENT_QUOTES</b></td>
3293
     *                              <td>Will convert both double and single quotes.</td>
3294
     *                              </tr>
3295
     *                              <tr valign="top">
3296
     *                              <td><b>ENT_NOQUOTES</b></td>
3297
     *                              <td>Will leave both double and single quotes unconverted.</td>
3298
     *                              </tr>
3299
     *                              <tr valign="top">
3300
     *                              <td><b>ENT_IGNORE</b></td>
3301
     *                              <td>
3302
     *                              Silently discard invalid code unit sequences instead of returning
3303
     *                              an empty string. Using this flag is discouraged as it
3304
     *                              may have security implications.
3305
     *                              </td>
3306
     *                              </tr>
3307
     *                              <tr valign="top">
3308
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3309
     *                              <td>
3310
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3311
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3312
     *                              string.
3313
     *                              </td>
3314
     *                              </tr>
3315
     *                              <tr valign="top">
3316
     *                              <td><b>ENT_DISALLOWED</b></td>
3317
     *                              <td>
3318
     *                              Replace invalid code points for the given document type with a
3319
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3320
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3321
     *                              instance, to ensure the well-formedness of XML documents with
3322
     *                              embedded external content.
3323
     *                              </td>
3324
     *                              </tr>
3325
     *                              <tr valign="top">
3326
     *                              <td><b>ENT_HTML401</b></td>
3327
     *                              <td>
3328
     *                              Handle code as HTML 4.01.
3329
     *                              </td>
3330
     *                              </tr>
3331
     *                              <tr valign="top">
3332
     *                              <td><b>ENT_XML1</b></td>
3333
     *                              <td>
3334
     *                              Handle code as XML 1.
3335
     *                              </td>
3336
     *                              </tr>
3337
     *                              <tr valign="top">
3338
     *                              <td><b>ENT_XHTML</b></td>
3339
     *                              <td>
3340
     *                              Handle code as XHTML.
3341
     *                              </td>
3342
     *                              </tr>
3343
     *                              <tr valign="top">
3344
     *                              <td><b>ENT_HTML5</b></td>
3345
     *                              <td>
3346
     *                              Handle code as HTML 5.
3347
     *                              </td>
3348
     *                              </tr>
3349
     *                              </table>
3350
     *                              </p>
3351
     * @param string $encoding      [optional] <p>
3352
     *                              Defines encoding used in conversion.
3353
     *                              </p>
3354
     *                              <p>
3355
     *                              For the purposes of this function, the encodings
3356
     *                              ISO-8859-1, ISO-8859-15,
3357
     *                              UTF-8, cp866,
3358
     *                              cp1251, cp1252, and
3359
     *                              KOI8-R are effectively equivalent, provided the
3360
     *                              <i>string</i> itself is valid for the encoding, as
3361
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3362
     *                              the same positions in all of these encodings.
3363
     *                              </p>
3364
     * @param bool   $double_encode [optional] <p>
3365
     *                              When <i>double_encode</i> is turned off PHP will not
3366
     *                              encode existing html entities, the default is to convert everything.
3367
     *                              </p>
3368
     *
3369
     * @psalm-pure
3370
     *
3371
     * @return string the converted string.
3372
     *                </p>
3373
     *                <p>
3374
     *                If the input <i>string</i> contains an invalid code unit
3375
     *                sequence within the given <i>encoding</i> an empty string
3376
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3377
     *                <b>ENT_SUBSTITUTE</b> flags are set
3378
     */
3379 8
    public static function htmlspecialchars(
3380
        string $str,
3381
        int $flags = \ENT_COMPAT,
3382
        string $encoding = 'UTF-8',
3383
        bool $double_encode = true
3384
    ): string {
3385 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3386 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3387
        }
3388
3389 8
        return \htmlspecialchars(
3390 8
            $str,
3391 8
            $flags,
3392 8
            $encoding,
3393 8
            $double_encode
3394
        );
3395
    }
3396
3397
    /**
3398
     * Checks whether iconv is available on the server.
3399
     *
3400
     * @psalm-pure
3401
     *
3402
     * @return bool
3403
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3404
     *
3405
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3406
     */
3407
    public static function iconv_loaded(): bool
3408
    {
3409
        return \extension_loaded('iconv');
3410
    }
3411
3412
    /**
3413
     * alias for "UTF8::decimal_to_chr()"
3414
     *
3415
     * @param int|string $int
3416
     *
3417
     * @phpstan-param int|numeric-string $int
3418
     *
3419
     * @psalm-pure
3420
     *
3421
     * @return string
3422
     *
3423
     * @see        UTF8::decimal_to_chr()
3424
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3425
     */
3426 4
    public static function int_to_chr($int): string
3427
    {
3428 4
        return self::decimal_to_chr($int);
3429
    }
3430
3431
    /**
3432
     * Converts Integer to hexadecimal U+xxxx code point representation.
3433
     *
3434
     * INFO: opposite to UTF8::hex_to_int()
3435
     *
3436
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3437
     *
3438
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3439
     * @param string $prefix [optional]
3440
     *
3441
     * @psalm-pure
3442
     *
3443
     * @return string the code point, or empty string on failure
3444
     */
3445 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3446
    {
3447 6
        $hex = \dechex($int);
3448
3449 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3450
3451 6
        return $prefix . $hex . '';
3452
    }
3453
3454
    /**
3455
     * Checks whether intl-char is available on the server.
3456
     *
3457
     * @psalm-pure
3458
     *
3459
     * @return bool
3460
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3461
     *
3462
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3463
     */
3464
    public static function intlChar_loaded(): bool
3465
    {
3466
        return \class_exists('IntlChar');
3467
    }
3468
3469
    /**
3470
     * Checks whether intl is available on the server.
3471
     *
3472
     * @psalm-pure
3473
     *
3474
     * @return bool
3475
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
3476
     *
3477
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
3478
     */
3479 5
    public static function intl_loaded(): bool
3480
    {
3481 5
        return \extension_loaded('intl');
3482
    }
3483
3484
    /**
3485
     * alias for "UTF8::is_ascii()"
3486
     *
3487
     * @param string $str
3488
     *
3489
     * @psalm-pure
3490
     *
3491
     * @return bool
3492
     *
3493
     * @see        UTF8::is_ascii()
3494
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3495
     */
3496 2
    public static function isAscii(string $str): bool
3497
    {
3498 2
        return ASCII::is_ascii($str);
3499
    }
3500
3501
    /**
3502
     * alias for "UTF8::is_base64()"
3503
     *
3504
     * @param string $str
3505
     *
3506
     * @psalm-pure
3507
     *
3508
     * @return bool
3509
     *
3510
     * @see        UTF8::is_base64()
3511
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3512
     */
3513 2
    public static function isBase64($str): bool
3514
    {
3515 2
        return self::is_base64($str);
3516
    }
3517
3518
    /**
3519
     * alias for "UTF8::is_binary()"
3520
     *
3521
     * @param int|string $str
3522
     * @param bool       $strict
3523
     *
3524
     * @psalm-pure
3525
     *
3526
     * @return bool
3527
     *
3528
     * @see        UTF8::is_binary()
3529
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3530
     */
3531 4
    public static function isBinary($str, bool $strict = false): bool
3532
    {
3533 4
        return self::is_binary($str, $strict);
3534
    }
3535
3536
    /**
3537
     * alias for "UTF8::is_bom()"
3538
     *
3539
     * @param string $utf8_chr
3540
     *
3541
     * @psalm-pure
3542
     *
3543
     * @return bool
3544
     *
3545
     * @see        UTF8::is_bom()
3546
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3547
     */
3548 2
    public static function isBom(string $utf8_chr): bool
3549
    {
3550 2
        return self::is_bom($utf8_chr);
3551
    }
3552
3553
    /**
3554
     * alias for "UTF8::is_html()"
3555
     *
3556
     * @param string $str
3557
     *
3558
     * @psalm-pure
3559
     *
3560
     * @return bool
3561
     *
3562
     * @see        UTF8::is_html()
3563
     * @deprecated <p>please use "UTF8::is_html()"</p>
3564
     */
3565 2
    public static function isHtml(string $str): bool
3566
    {
3567 2
        return self::is_html($str);
3568
    }
3569
3570
    /**
3571
     * alias for "UTF8::is_json()"
3572
     *
3573
     * @param string $str
3574
     *
3575
     * @return bool
3576
     *
3577
     * @see        UTF8::is_json()
3578
     * @deprecated <p>please use "UTF8::is_json()"</p>
3579
     */
3580 1
    public static function isJson(string $str): bool
3581
    {
3582 1
        return self::is_json($str);
3583
    }
3584
3585
    /**
3586
     * alias for "UTF8::is_utf16()"
3587
     *
3588
     * @param string $str
3589
     *
3590
     * @psalm-pure
3591
     *
3592
     * @return false|int
3593
     *                   <strong>false</strong> if is't not UTF16,<br>
3594
     *                   <strong>1</strong> for UTF-16LE,<br>
3595
     *                   <strong>2</strong> for UTF-16BE
3596
     *
3597
     * @see        UTF8::is_utf16()
3598
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3599
     */
3600 2
    public static function isUtf16($str)
3601
    {
3602 2
        return self::is_utf16($str);
3603
    }
3604
3605
    /**
3606
     * alias for "UTF8::is_utf32()"
3607
     *
3608
     * @param string $str
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return false|int
3613
     *                   <strong>false</strong> if is't not UTF16,
3614
     *                   <strong>1</strong> for UTF-32LE,
3615
     *                   <strong>2</strong> for UTF-32BE
3616
     *
3617
     * @see        UTF8::is_utf32()
3618
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3619
     */
3620 2
    public static function isUtf32($str)
3621
    {
3622 2
        return self::is_utf32($str);
3623
    }
3624
3625
    /**
3626
     * alias for "UTF8::is_utf8()"
3627
     *
3628
     * @param string $str
3629
     * @param bool   $strict
3630
     *
3631
     * @psalm-pure
3632
     *
3633
     * @return bool
3634
     *
3635
     * @see        UTF8::is_utf8()
3636
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3637
     */
3638 17
    public static function isUtf8($str, bool $strict = false): bool
3639
    {
3640 17
        return self::is_utf8($str, $strict);
3641
    }
3642
3643
    /**
3644
     * Returns true if the string contains only alphabetic chars, false otherwise.
3645
     *
3646
     * @param string $str <p>The input string.</p>
3647
     *
3648
     * @psalm-pure
3649
     *
3650
     * @return bool
3651
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3652
     */
3653 10
    public static function is_alpha(string $str): bool
3654
    {
3655 10
        if (self::$SUPPORT['mbstring'] === true) {
3656
            /** @noinspection PhpComposerExtensionStubsInspection */
3657 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3658
        }
3659
3660
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3661
    }
3662
3663
    /**
3664
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3665
     *
3666
     * @param string $str <p>The input string.</p>
3667
     *
3668
     * @psalm-pure
3669
     *
3670
     * @return bool
3671
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3672
     */
3673 13
    public static function is_alphanumeric(string $str): bool
3674
    {
3675 13
        if (self::$SUPPORT['mbstring'] === true) {
3676
            /** @noinspection PhpComposerExtensionStubsInspection */
3677 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3678
        }
3679
3680
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3681
    }
3682
3683
    /**
3684
     * Returns true if the string contains only punctuation chars, false otherwise.
3685
     *
3686
     * @param string $str <p>The input string.</p>
3687
     *
3688
     * @psalm-pure
3689
     *
3690
     * @return bool
3691
     *              <p>Whether or not $str contains only punctuation chars.</p>
3692
     */
3693 10
    public static function is_punctuation(string $str): bool
3694
    {
3695 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3696
    }
3697
3698
    /**
3699
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3700
     *
3701
     * @param string $str                       <p>The input string.</p>
3702
     * @param bool   $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p>
3703
     *
3704
     * @psalm-pure
3705
     *
3706
     * @return bool
3707
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3708
     */
3709 1
    public static function is_printable(string $str, bool $ignore_control_characters = false): bool
3710
    {
3711 1
        return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str;
3712
    }
3713
3714
    /**
3715
     * Checks if a string is 7 bit ASCII.
3716
     *
3717
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3718
     *
3719
     * @param string $str <p>The string to check.</p>
3720
     *
3721
     * @psalm-pure
3722
     *
3723
     * @return bool
3724
     *              <p>
3725
     *              <strong>true</strong> if it is ASCII<br>
3726
     *              <strong>false</strong> otherwise
3727
     *              </p>
3728
     */
3729 8
    public static function is_ascii(string $str): bool
3730
    {
3731 8
        return ASCII::is_ascii($str);
3732
    }
3733
3734
    /**
3735
     * Returns true if the string is base64 encoded, false otherwise.
3736
     *
3737
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3738
     *
3739
     * @param string|null $str                   <p>The input string.</p>
3740
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3741
     *
3742
     * @psalm-pure
3743
     *
3744
     * @return bool
3745
     *              <p>Whether or not $str is base64 encoded.</p>
3746
     */
3747 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3748
    {
3749
        if (
3750 16
            !$empty_string_is_valid
3751
            &&
3752 16
            $str === ''
3753
        ) {
3754 3
            return false;
3755
        }
3756
3757 15
        if (!\is_string($str)) {
3758 2
            return false;
3759
        }
3760
3761 15
        $base64String = \base64_decode($str, true);
3762
3763 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3764
    }
3765
3766
    /**
3767
     * Check if the input is binary... (is look like a hack).
3768
     *
3769
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3770
     *
3771
     * @param int|string $input
3772
     * @param bool       $strict
3773
     *
3774
     * @psalm-pure
3775
     *
3776
     * @return bool
3777
     */
3778 40
    public static function is_binary($input, bool $strict = false): bool
3779
    {
3780 40
        $input = (string) $input;
3781 40
        if ($input === '') {
3782 10
            return false;
3783
        }
3784
3785 40
        if (\preg_match('~^[01]+$~', $input)) {
3786 13
            return true;
3787
        }
3788
3789 40
        $ext = self::get_file_type($input);
3790 40
        if ($ext['type'] === 'binary') {
3791 7
            return true;
3792
        }
3793
3794 39
        $test_length = \strlen($input);
3795 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3796 39
        if (($test_null_counting / $test_length) > 0.25) {
3797 15
            return true;
3798
        }
3799
3800 35
        if ($strict) {
3801 35
            if (self::$SUPPORT['finfo'] === false) {
3802
                throw new \RuntimeException('ext-fileinfo: is not installed');
3803
            }
3804
3805
            /**
3806
             * @noinspection   PhpComposerExtensionStubsInspection
3807
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3808
             */
3809 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3810 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3811 15
                return true;
3812
            }
3813
        }
3814
3815 31
        return false;
3816
    }
3817
3818
    /**
3819
     * Check if the file is binary.
3820
     *
3821
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3822
     *
3823
     * @param string $file
3824
     *
3825
     * @return bool
3826
     */
3827 6
    public static function is_binary_file($file): bool
3828
    {
3829
        // init
3830 6
        $block = '';
3831
3832 6
        $fp = \fopen($file, 'rb');
3833 6
        if (\is_resource($fp)) {
3834 6
            $block = \fread($fp, 512);
3835 6
            \fclose($fp);
3836
        }
3837
3838 6
        if ($block === '' || $block === false) {
3839 2
            return false;
3840
        }
3841
3842 6
        return self::is_binary($block, true);
3843
    }
3844
3845
    /**
3846
     * Returns true if the string contains only whitespace chars, false otherwise.
3847
     *
3848
     * @param string $str <p>The input string.</p>
3849
     *
3850
     * @psalm-pure
3851
     *
3852
     * @return bool
3853
     *              <p>Whether or not $str contains only whitespace characters.</p>
3854
     */
3855 15
    public static function is_blank(string $str): bool
3856
    {
3857 15
        if (self::$SUPPORT['mbstring'] === true) {
3858
            /** @noinspection PhpComposerExtensionStubsInspection */
3859 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3860
        }
3861
3862
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3863
    }
3864
3865
    /**
3866
     * Checks if the given string is equal to any "Byte Order Mark".
3867
     *
3868
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3869
     *
3870
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3871
     *
3872
     * @param string $str <p>The input string.</p>
3873
     *
3874
     * @psalm-pure
3875
     *
3876
     * @return bool
3877
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3878
     */
3879 2
    public static function is_bom($str): bool
3880
    {
3881
        /** @noinspection PhpUnusedLocalVariableInspection */
3882 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3883 2
            if ($str === $bom_string) {
3884 2
                return true;
3885
            }
3886
        }
3887
3888 2
        return false;
3889
    }
3890
3891
    /**
3892
     * Determine whether the string is considered to be empty.
3893
     *
3894
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3895
     * empty() does not generate a warning if the variable does not exist.
3896
     *
3897
     * @param array|float|int|string $str
3898
     *
3899
     * @psalm-pure
3900
     *
3901
     * @return bool
3902
     *              <p>Whether or not $str is empty().</p>
3903
     */
3904 1
    public static function is_empty($str): bool
3905
    {
3906 1
        return empty($str);
3907
    }
3908
3909
    /**
3910
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3911
     *
3912
     * @param string $str <p>The input string.</p>
3913
     *
3914
     * @psalm-pure
3915
     *
3916
     * @return bool
3917
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3918
     */
3919 13
    public static function is_hexadecimal(string $str): bool
3920
    {
3921 13
        if (self::$SUPPORT['mbstring'] === true) {
3922
            /** @noinspection PhpComposerExtensionStubsInspection */
3923 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3924
        }
3925
3926
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3927
    }
3928
3929
    /**
3930
     * Check if the string contains any HTML tags.
3931
     *
3932
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3933
     *
3934
     * @param string $str <p>The input string.</p>
3935
     *
3936
     * @psalm-pure
3937
     *
3938
     * @return bool
3939
     *              <p>Whether or not $str contains html elements.</p>
3940
     */
3941 3
    public static function is_html(string $str): bool
3942
    {
3943 3
        if ($str === '') {
3944 3
            return false;
3945
        }
3946
3947
        // init
3948 3
        $matches = [];
3949
3950 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3951
3952 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3953
3954 3
        return $matches !== [];
3955
    }
3956
3957
    /**
3958
     * Check if $url is an correct url.
3959
     *
3960
     * @param string $url
3961
     * @param bool   $disallow_localhost
3962
     *
3963
     * @psalm-pure
3964
     *
3965
     * @return bool
3966
     */
3967 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3968
    {
3969 1
        if ($url === '') {
3970 1
            return false;
3971
        }
3972
3973
        // WARNING: keep this as hack protection
3974 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3975 1
            return false;
3976
        }
3977
3978
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3979 1
        if ($disallow_localhost) {
3980 1
            if (self::str_istarts_with_any(
3981 1
                $url,
3982
                [
3983 1
                    'http://localhost',
3984
                    'https://localhost',
3985
                    'http://127.0.0.1',
3986
                    'https://127.0.0.1',
3987
                    'http://::1',
3988
                    'https://::1',
3989
                ]
3990
            )) {
3991 1
                return false;
3992
            }
3993
3994 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3995
            /** @noinspection BypassedUrlValidationInspection */
3996 1
            if (\preg_match($regex, $url)) {
3997 1
                return false;
3998
            }
3999
        }
4000
4001
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
4002
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
4003 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
4004
        /** @noinspection BypassedUrlValidationInspection */
4005 1
        if (\preg_match($regex, $url)) {
4006 1
            return true;
4007
        }
4008
4009
        /** @noinspection BypassedUrlValidationInspection */
4010 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
4011
    }
4012
4013
    /**
4014
     * Try to check if "$str" is a JSON-string.
4015
     *
4016
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
4017
     *
4018
     * @param string $str                                    <p>The input string.</p>
4019
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
4020
     *                                                       results.</p>
4021
     *
4022
     * @return bool
4023
     *              <p>Whether or not the $str is in JSON format.</p>
4024
     */
4025 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4026
    {
4027 42
        if ($str === '') {
4028 4
            return false;
4029
        }
4030
4031 40
        if (self::$SUPPORT['json'] === false) {
4032
            throw new \RuntimeException('ext-json: is not installed');
4033
        }
4034
4035 40
        $jsonOrNull = self::json_decode($str);
4036 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4037 18
            return false;
4038
        }
4039
4040
        if (
4041 24
            $only_array_or_object_results_are_valid
4042
            &&
4043 24
            !\is_object($jsonOrNull)
4044
            &&
4045 24
            !\is_array($jsonOrNull)
4046
        ) {
4047 5
            return false;
4048
        }
4049
4050
        /** @noinspection PhpComposerExtensionStubsInspection */
4051 19
        return \json_last_error() === \JSON_ERROR_NONE;
4052
    }
4053
4054
    /**
4055
     * @param string $str <p>The input string.</p>
4056
     *
4057
     * @psalm-pure
4058
     *
4059
     * @return bool
4060
     *              <p>Whether or not $str contains only lowercase chars.</p>
4061
     */
4062 8
    public static function is_lowercase(string $str): bool
4063
    {
4064 8
        if (self::$SUPPORT['mbstring'] === true) {
4065
            /** @noinspection PhpComposerExtensionStubsInspection */
4066 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4067
        }
4068
4069
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4070
    }
4071
4072
    /**
4073
     * Returns true if the string is serialized, false otherwise.
4074
     *
4075
     * @param string $str <p>The input string.</p>
4076
     *
4077
     * @psalm-pure
4078
     *
4079
     * @return bool
4080
     *              <p>Whether or not $str is serialized.</p>
4081
     */
4082 7
    public static function is_serialized(string $str): bool
4083
    {
4084 7
        if ($str === '') {
4085 1
            return false;
4086
        }
4087
4088
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4089
        /** @noinspection UnserializeExploitsInspection */
4090 6
        return $str === 'b:0;'
4091
               ||
4092 6
               @\unserialize($str) !== false;
4093
    }
4094
4095
    /**
4096
     * Returns true if the string contains only lower case chars, false
4097
     * otherwise.
4098
     *
4099
     * @param string $str <p>The input string.</p>
4100
     *
4101
     * @psalm-pure
4102
     *
4103
     * @return bool
4104
     *              <p>Whether or not $str contains only lower case characters.</p>
4105
     */
4106 8
    public static function is_uppercase(string $str): bool
4107
    {
4108 8
        if (self::$SUPPORT['mbstring'] === true) {
4109
            /** @noinspection PhpComposerExtensionStubsInspection */
4110 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4111
        }
4112
4113
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4114
    }
4115
4116
    /**
4117
     * Check if the string is UTF-16.
4118
     *
4119
     * EXAMPLE: <code>
4120
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4121
     * //
4122
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4123
     * //
4124
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4125
     * </code>
4126
     *
4127
     * @param string $str                       <p>The input string.</p>
4128
     * @param bool   $check_if_string_is_binary
4129
     *
4130
     * @psalm-pure
4131
     *
4132
     * @return false|int
4133
     *                   <strong>false</strong> if is't not UTF-16,<br>
4134
     *                   <strong>1</strong> for UTF-16LE,<br>
4135
     *                   <strong>2</strong> for UTF-16BE
4136
     */
4137 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4138
    {
4139
        // init
4140 22
        $str = (string) $str;
4141 22
        $str_chars = [];
4142
4143
        if (
4144 22
            $check_if_string_is_binary
4145
            &&
4146 22
            !self::is_binary($str, true)
4147
        ) {
4148 2
            return false;
4149
        }
4150
4151 22
        if (self::$SUPPORT['mbstring'] === false) {
4152
            /**
4153
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4154
             */
4155 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4156
        }
4157
4158 22
        $str = self::remove_bom($str);
4159
4160 22
        $maybe_utf16le = 0;
4161 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4162 22
        if ($test) {
4163 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4164 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4165 15
            if ($test3 === $test) {
4166
                /**
4167
                 * @psalm-suppress RedundantCondition
4168
                 */
4169 15
                if ($str_chars === []) {
4170 15
                    $str_chars = self::count_chars($str, true, false);
4171
                }
4172 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4172
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
4173 15
                    if (\in_array($test3char, $str_chars, true)) {
4174 5
                        ++$maybe_utf16le;
4175
                    }
4176
                }
4177 15
                unset($test3charEmpty);
4178
            }
4179
        }
4180
4181 22
        $maybe_utf16be = 0;
4182 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4183 22
        if ($test) {
4184 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4185 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4186 15
            if ($test3 === $test) {
4187 15
                if ($str_chars === []) {
4188 7
                    $str_chars = self::count_chars($str, true, false);
4189
                }
4190 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4191 15
                    if (\in_array($test3char, $str_chars, true)) {
4192 6
                        ++$maybe_utf16be;
4193
                    }
4194
                }
4195 15
                unset($test3charEmpty);
4196
            }
4197
        }
4198
4199 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4200 7
            if ($maybe_utf16le > $maybe_utf16be) {
4201 5
                return 1;
4202
            }
4203
4204 6
            return 2;
4205
        }
4206
4207 18
        return false;
4208
    }
4209
4210
    /**
4211
     * Check if the string is UTF-32.
4212
     *
4213
     * EXAMPLE: <code>
4214
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4215
     * //
4216
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4217
     * //
4218
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4219
     * </code>
4220
     *
4221
     * @param string $str                       <p>The input string.</p>
4222
     * @param bool   $check_if_string_is_binary
4223
     *
4224
     * @psalm-pure
4225
     *
4226
     * @return false|int
4227
     *                   <strong>false</strong> if is't not UTF-32,<br>
4228
     *                   <strong>1</strong> for UTF-32LE,<br>
4229
     *                   <strong>2</strong> for UTF-32BE
4230
     */
4231 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4232
    {
4233
        // init
4234 20
        $str = (string) $str;
4235 20
        $str_chars = [];
4236
4237
        if (
4238 20
            $check_if_string_is_binary
4239
            &&
4240 20
            !self::is_binary($str, true)
4241
        ) {
4242 2
            return false;
4243
        }
4244
4245 20
        if (self::$SUPPORT['mbstring'] === false) {
4246
            /**
4247
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4248
             */
4249 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4250
        }
4251
4252 20
        $str = self::remove_bom($str);
4253
4254 20
        $maybe_utf32le = 0;
4255 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4256 20
        if ($test) {
4257 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4258 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4259 13
            if ($test3 === $test) {
4260
                /**
4261
                 * @psalm-suppress RedundantCondition
4262
                 */
4263 13
                if ($str_chars === []) {
4264 13
                    $str_chars = self::count_chars($str, true, false);
4265
                }
4266 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
It seems like $test3 can also be of type array; however, parameter $str of voku\helper\UTF8::count_chars() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

4266
                foreach (self::count_chars(/** @scrutinizer ignore-type */ $test3) as $test3char => &$test3charEmpty) {
Loading history...
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4267 13
                    if (\in_array($test3char, $str_chars, true)) {
4268 2
                        ++$maybe_utf32le;
4269
                    }
4270
                }
4271 13
                unset($test3charEmpty);
4272
            }
4273
        }
4274
4275 20
        $maybe_utf32be = 0;
4276 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4277 20
        if ($test) {
4278 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4279 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4280 13
            if ($test3 === $test) {
4281 13
                if ($str_chars === []) {
4282 7
                    $str_chars = self::count_chars($str, true, false);
4283
                }
4284 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4285 13
                    if (\in_array($test3char, $str_chars, true)) {
4286 3
                        ++$maybe_utf32be;
4287
                    }
4288
                }
4289 13
                unset($test3charEmpty);
4290
            }
4291
        }
4292
4293 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4294 3
            if ($maybe_utf32le > $maybe_utf32be) {
4295 2
                return 1;
4296
            }
4297
4298 3
            return 2;
4299
        }
4300
4301 20
        return false;
4302
    }
4303
4304
    /**
4305
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4306
     *
4307
     * EXAMPLE: <code>
4308
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4309
     * //
4310
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4311
     * </code>
4312
     *
4313
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4314
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4315
     *
4316
     * @psalm-pure
4317
     *
4318
     * @return bool
4319
     */
4320 83
    public static function is_utf8($str, bool $strict = false): bool
4321
    {
4322 83
        if (\is_array($str)) {
4323 2
            foreach ($str as &$v) {
4324 2
                if (!self::is_utf8($v, $strict)) {
4325 2
                    return false;
4326
                }
4327
            }
4328
4329
            return true;
4330
        }
4331
4332 83
        return self::is_utf8_string((string) $str, $strict);
4333
    }
4334
4335
    /**
4336
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4337
     * Decodes a JSON string
4338
     *
4339
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4340
     *
4341
     * @see http://php.net/manual/en/function.json-decode.php
4342
     *
4343
     * @param string $json    <p>
4344
     *                        The <i>json</i> string being decoded.
4345
     *                        </p>
4346
     *                        <p>
4347
     *                        This function only works with UTF-8 encoded strings.
4348
     *                        </p>
4349
     *                        <p>PHP implements a superset of
4350
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4351
     *                        only supports these values when they are nested inside an array or an object.
4352
     *                        </p>
4353
     * @param bool   $assoc   [optional] <p>
4354
     *                        When <b>TRUE</b>, returned objects will be converted into
4355
     *                        associative arrays.
4356
     *                        </p>
4357
     * @param int    $depth   [optional] <p>
4358
     *                        User specified recursion depth.
4359
     *                        </p>
4360
     * @param int    $options [optional] <p>
4361
     *                        Bitmask of JSON decode options. Currently only
4362
     *                        <b>JSON_BIGINT_AS_STRING</b>
4363
     *                        is supported (default is to cast large integers as floats)
4364
     *                        </p>
4365
     *
4366
     * @psalm-pure
4367
     *
4368
     * @return mixed
4369
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4370
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4371
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4372
     *               is deeper than the recursion limit.</p>
4373
     */
4374 43
    public static function json_decode(
4375
        string $json,
4376
        bool $assoc = false,
4377
        int $depth = 512,
4378
        int $options = 0
4379
    ) {
4380 43
        $json = self::filter($json);
4381
4382 43
        if (self::$SUPPORT['json'] === false) {
4383
            throw new \RuntimeException('ext-json: is not installed');
4384
        }
4385
4386
        /** @noinspection PhpComposerExtensionStubsInspection */
4387 43
        return \json_decode($json, $assoc, $depth, $options);
4388
    }
4389
4390
    /**
4391
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4392
     * Returns the JSON representation of a value.
4393
     *
4394
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4395
     *
4396
     * @see http://php.net/manual/en/function.json-encode.php
4397
     *
4398
     * @param mixed $value   <p>
4399
     *                       The <i>value</i> being encoded. Can be any type except
4400
     *                       a resource.
4401
     *                       </p>
4402
     *                       <p>
4403
     *                       All string data must be UTF-8 encoded.
4404
     *                       </p>
4405
     *                       <p>PHP implements a superset of
4406
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4407
     *                       only supports these values when they are nested inside an array or an object.
4408
     *                       </p>
4409
     * @param int   $options [optional] <p>
4410
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4411
     *                       <b>JSON_HEX_TAG</b>,
4412
     *                       <b>JSON_HEX_AMP</b>,
4413
     *                       <b>JSON_HEX_APOS</b>,
4414
     *                       <b>JSON_NUMERIC_CHECK</b>,
4415
     *                       <b>JSON_PRETTY_PRINT</b>,
4416
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4417
     *                       <b>JSON_FORCE_OBJECT</b>,
4418
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4419
     *                       constants is described on
4420
     *                       the JSON constants page.
4421
     *                       </p>
4422
     * @param int   $depth   [optional] <p>
4423
     *                       Set the maximum depth. Must be greater than zero.
4424
     *                       </p>
4425
     *
4426
     * @psalm-pure
4427
     *
4428
     * @return false|string
4429
     *                      A JSON encoded <strong>string</strong> on success or<br>
4430
     *                      <strong>FALSE</strong> on failure
4431
     */
4432 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4433
    {
4434 5
        $value = self::filter($value);
4435
4436 5
        if (self::$SUPPORT['json'] === false) {
4437
            throw new \RuntimeException('ext-json: is not installed');
4438
        }
4439
4440
        /** @noinspection PhpComposerExtensionStubsInspection */
4441 5
        return \json_encode($value, $options, $depth);
4442
    }
4443
4444
    /**
4445
     * Checks whether JSON is available on the server.
4446
     *
4447
     * @psalm-pure
4448
     *
4449
     * @return bool
4450
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4451
     *
4452
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4453
     */
4454
    public static function json_loaded(): bool
4455
    {
4456
        return \function_exists('json_decode');
4457
    }
4458
4459
    /**
4460
     * Makes string's first char lowercase.
4461
     *
4462
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4463
     *
4464
     * @param string      $str                           <p>The input string</p>
4465
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4466
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4467
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4468
     *                                                   tr</p>
4469
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4470
     *                                                   -> ß</p>
4471
     *
4472
     * @psalm-pure
4473
     *
4474
     * @return string the resulting string
4475
     */
4476 46
    public static function lcfirst(
4477
        string $str,
4478
        string $encoding = 'UTF-8',
4479
        bool $clean_utf8 = false,
4480
        string $lang = null,
4481
        bool $try_to_keep_the_string_length = false
4482
    ): string {
4483 46
        if ($clean_utf8) {
4484
            $str = self::clean($str);
4485
        }
4486
4487 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4488
4489 46
        if ($encoding === 'UTF-8') {
4490 43
            $str_part_two = (string) \mb_substr($str, 1);
4491
4492 43
            if ($use_mb_functions) {
4493 43
                $str_part_one = \mb_strtolower(
4494 43
                    (string) \mb_substr($str, 0, 1)
4495
                );
4496
            } else {
4497 43
                $str_part_one = self::strtolower(
4498
                    (string) \mb_substr($str, 0, 1),
4499
                    $encoding,
4500
                    false,
4501
                    $lang,
4502
                    $try_to_keep_the_string_length
4503
                );
4504
            }
4505
        } else {
4506 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4507
4508 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4509
4510 3
            $str_part_one = self::strtolower(
4511 3
                (string) self::substr($str, 0, 1, $encoding),
4512
                $encoding,
4513 3
                false,
4514
                $lang,
4515
                $try_to_keep_the_string_length
4516
            );
4517
        }
4518
4519 46
        return $str_part_one . $str_part_two;
4520
    }
4521
4522
    /**
4523
     * alias for "UTF8::lcfirst()"
4524
     *
4525
     * @param string      $str
4526
     * @param string      $encoding
4527
     * @param bool        $clean_utf8
4528
     * @param string|null $lang
4529
     * @param bool        $try_to_keep_the_string_length
4530
     *
4531
     * @psalm-pure
4532
     *
4533
     * @return string
4534
     *
4535
     * @see        UTF8::lcfirst()
4536
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4537
     */
4538 2
    public static function lcword(
4539
        string $str,
4540
        string $encoding = 'UTF-8',
4541
        bool $clean_utf8 = false,
4542
        string $lang = null,
4543
        bool $try_to_keep_the_string_length = false
4544
    ): string {
4545 2
        return self::lcfirst(
4546 2
            $str,
4547 2
            $encoding,
4548 2
            $clean_utf8,
4549 2
            $lang,
4550 2
            $try_to_keep_the_string_length
4551
        );
4552
    }
4553
4554
    /**
4555
     * Lowercase for all words in the string.
4556
     *
4557
     * @param string      $str                           <p>The input string.</p>
4558
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4559
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4560
     *                                                   not start a new word.</p>
4561
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4562
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4563
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4564
     *                                                   tr</p>
4565
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4566
     *                                                   -> ß</p>
4567
     *
4568
     * @psalm-pure
4569
     *
4570
     * @return string
4571
     */
4572 2
    public static function lcwords(
4573
        string $str,
4574
        array $exceptions = [],
4575
        string $char_list = '',
4576
        string $encoding = 'UTF-8',
4577
        bool $clean_utf8 = false,
4578
        string $lang = null,
4579
        bool $try_to_keep_the_string_length = false
4580
    ): string {
4581 2
        if (!$str) {
4582 2
            return '';
4583
        }
4584
4585 2
        $words = self::str_to_words($str, $char_list);
4586 2
        $use_exceptions = $exceptions !== [];
4587
4588 2
        $words_str = '';
4589 2
        foreach ($words as &$word) {
4590 2
            if (!$word) {
4591 2
                continue;
4592
            }
4593
4594
            if (
4595 2
                !$use_exceptions
4596
                ||
4597 2
                !\in_array($word, $exceptions, true)
4598
            ) {
4599 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4600
            } else {
4601 2
                $words_str .= $word;
4602
            }
4603
        }
4604
4605 2
        return $words_str;
4606
    }
4607
4608
    /**
4609
     * alias for "UTF8::lcfirst()"
4610
     *
4611
     * @param string      $str
4612
     * @param string      $encoding
4613
     * @param bool        $clean_utf8
4614
     * @param string|null $lang
4615
     * @param bool        $try_to_keep_the_string_length
4616
     *
4617
     * @psalm-pure
4618
     *
4619
     * @return string
4620
     *
4621
     * @see        UTF8::lcfirst()
4622
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4623
     */
4624 5
    public static function lowerCaseFirst(
4625
        string $str,
4626
        string $encoding = 'UTF-8',
4627
        bool $clean_utf8 = false,
4628
        string $lang = null,
4629
        bool $try_to_keep_the_string_length = false
4630
    ): string {
4631 5
        return self::lcfirst(
4632 5
            $str,
4633 5
            $encoding,
4634 5
            $clean_utf8,
4635 5
            $lang,
4636 5
            $try_to_keep_the_string_length
4637
        );
4638
    }
4639
4640
    /**
4641
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4642
     *
4643
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4644
     *
4645
     * @param string      $str   <p>The string to be trimmed</p>
4646
     * @param string|null $chars <p>Optional characters to be stripped</p>
4647
     *
4648
     * @psalm-pure
4649
     *
4650
     * @return string the string with unwanted characters stripped from the left
4651
     */
4652 23
    public static function ltrim(string $str = '', string $chars = null): string
4653
    {
4654 23
        if ($str === '') {
4655 3
            return '';
4656
        }
4657
4658 22
        if (self::$SUPPORT['mbstring'] === true) {
4659 22
            if ($chars !== null) {
4660
                /** @noinspection PregQuoteUsageInspection */
4661 11
                $chars = \preg_quote($chars);
4662 11
                $pattern = "^[${chars}]+";
4663
            } else {
4664 14
                $pattern = '^[\\s]+';
4665
            }
4666
4667
            /** @noinspection PhpComposerExtensionStubsInspection */
4668 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4669
        }
4670
4671
        if ($chars !== null) {
4672
            $chars = \preg_quote($chars, '/');
4673
            $pattern = "^[${chars}]+";
4674
        } else {
4675
            $pattern = '^[\\s]+';
4676
        }
4677
4678
        return self::regex_replace($str, $pattern, '');
4679
    }
4680
4681
    /**
4682
     * Returns the UTF-8 character with the maximum code point in the given data.
4683
     *
4684
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4685
     *
4686
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4687
     *
4688
     * @psalm-pure
4689
     *
4690
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4691
     */
4692 2
    public static function max($arg)
4693
    {
4694 2
        if (\is_array($arg)) {
4695 2
            $arg = \implode('', $arg);
4696
        }
4697
4698 2
        $codepoints = self::codepoints($arg);
4699 2
        if ($codepoints === []) {
4700 2
            return null;
4701
        }
4702
4703 2
        $codepoint_max = \max($codepoints);
4704
4705 2
        return self::chr((int) $codepoint_max);
4706
    }
4707
4708
    /**
4709
     * Calculates and returns the maximum number of bytes taken by any
4710
     * UTF-8 encoded character in the given string.
4711
     *
4712
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4713
     *
4714
     * @param string $str <p>The original Unicode string.</p>
4715
     *
4716
     * @psalm-pure
4717
     *
4718
     * @return int
4719
     *             <p>Max byte lengths of the given chars.</p>
4720
     */
4721 2
    public static function max_chr_width(string $str): int
4722
    {
4723 2
        $bytes = self::chr_size_list($str);
4724 2
        if ($bytes !== []) {
4725 2
            return (int) \max($bytes);
4726
        }
4727
4728 2
        return 0;
4729
    }
4730
4731
    /**
4732
     * Checks whether mbstring is available on the server.
4733
     *
4734
     * @psalm-pure
4735
     *
4736
     * @return bool
4737
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4738
     *
4739
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
4740
     */
4741 29
    public static function mbstring_loaded(): bool
4742
    {
4743 29
        return \extension_loaded('mbstring');
4744
    }
4745
4746
    /**
4747
     * Returns the UTF-8 character with the minimum code point in the given data.
4748
     *
4749
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4750
     *
4751
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4752
     *
4753
     * @psalm-pure
4754
     *
4755
     * @return string|null
4756
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4757
     */
4758 2
    public static function min($arg)
4759
    {
4760 2
        if (\is_array($arg)) {
4761 2
            $arg = \implode('', $arg);
4762
        }
4763
4764 2
        $codepoints = self::codepoints($arg);
4765 2
        if ($codepoints === []) {
4766 2
            return null;
4767
        }
4768
4769 2
        $codepoint_min = \min($codepoints);
4770
4771 2
        return self::chr((int) $codepoint_min);
4772
    }
4773
4774
    /**
4775
     * alias for "UTF8::normalize_encoding()"
4776
     *
4777
     * @param mixed $encoding
4778
     * @param mixed $fallback
4779
     *
4780
     * @psalm-pure
4781
     *
4782
     * @return mixed
4783
     *
4784
     * @see        UTF8::normalize_encoding()
4785
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4786
     */
4787 2
    public static function normalizeEncoding($encoding, $fallback = '')
4788
    {
4789 2
        return self::normalize_encoding($encoding, $fallback);
4790
    }
4791
4792
    /**
4793
     * Normalize the encoding-"name" input.
4794
     *
4795
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4796
     *
4797
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4798
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4799
     *
4800
     * @psalm-pure
4801
     *
4802
     * @return mixed|string
4803
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4804
     *
4805
     * @template TNormalizeEncodingFallback
4806
     * @phpstan-param string|TNormalizeEncodingFallback $fallback
4807
     * @phpstan-return string|TNormalizeEncodingFallback
4808
     */
4809 339
    public static function normalize_encoding($encoding, $fallback = '')
4810
    {
4811
        /**
4812
         * @psalm-suppress ImpureStaticVariable
4813
         *
4814
         * @var array<string,string>
4815
         */
4816 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4817
4818
        // init
4819 339
        $encoding = (string) $encoding;
4820
4821 339
        if (!$encoding) {
4822 290
            return $fallback;
4823
        }
4824
4825
        if (
4826 53
            $encoding === 'UTF-8'
4827
            ||
4828 53
            $encoding === 'UTF8'
4829
        ) {
4830 29
            return 'UTF-8';
4831
        }
4832
4833
        if (
4834 44
            $encoding === '8BIT'
4835
            ||
4836 44
            $encoding === 'BINARY'
4837
        ) {
4838
            return 'CP850';
4839
        }
4840
4841
        if (
4842 44
            $encoding === 'HTML'
4843
            ||
4844 44
            $encoding === 'HTML-ENTITIES'
4845
        ) {
4846 2
            return 'HTML-ENTITIES';
4847
        }
4848
4849
        if (
4850 44
            $encoding === 'ISO'
4851
            ||
4852 44
            $encoding === 'ISO-8859-1'
4853
        ) {
4854 41
            return 'ISO-8859-1';
4855
        }
4856
4857
        if (
4858 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4859
            ||
4860 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4861
        ) {
4862
            return $fallback;
4863
        }
4864
4865 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4866 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4867
        }
4868
4869 5
        if (self::$ENCODINGS === null) {
4870 1
            self::$ENCODINGS = self::getData('encodings');
4871
        }
4872
4873 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4874 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4875
4876 3
            return $encoding;
4877
        }
4878
4879 4
        $encoding_original = $encoding;
4880 4
        $encoding = \strtoupper($encoding);
4881 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4882
4883 4
        $equivalences = [
4884
            'ISO8859'     => 'ISO-8859-1',
4885
            'ISO88591'    => 'ISO-8859-1',
4886
            'ISO'         => 'ISO-8859-1',
4887
            'LATIN'       => 'ISO-8859-1',
4888
            'LATIN1'      => 'ISO-8859-1', // Western European
4889
            'ISO88592'    => 'ISO-8859-2',
4890
            'LATIN2'      => 'ISO-8859-2', // Central European
4891
            'ISO88593'    => 'ISO-8859-3',
4892
            'LATIN3'      => 'ISO-8859-3', // Southern European
4893
            'ISO88594'    => 'ISO-8859-4',
4894
            'LATIN4'      => 'ISO-8859-4', // Northern European
4895
            'ISO88595'    => 'ISO-8859-5',
4896
            'ISO88596'    => 'ISO-8859-6', // Greek
4897
            'ISO88597'    => 'ISO-8859-7',
4898
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4899
            'ISO88599'    => 'ISO-8859-9',
4900
            'LATIN5'      => 'ISO-8859-9', // Turkish
4901
            'ISO885911'   => 'ISO-8859-11',
4902
            'TIS620'      => 'ISO-8859-11', // Thai
4903
            'ISO885910'   => 'ISO-8859-10',
4904
            'LATIN6'      => 'ISO-8859-10', // Nordic
4905
            'ISO885913'   => 'ISO-8859-13',
4906
            'LATIN7'      => 'ISO-8859-13', // Baltic
4907
            'ISO885914'   => 'ISO-8859-14',
4908
            'LATIN8'      => 'ISO-8859-14', // Celtic
4909
            'ISO885915'   => 'ISO-8859-15',
4910
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4911
            'ISO885916'   => 'ISO-8859-16',
4912
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4913
            'CP1250'      => 'WINDOWS-1250',
4914
            'WIN1250'     => 'WINDOWS-1250',
4915
            'WINDOWS1250' => 'WINDOWS-1250',
4916
            'CP1251'      => 'WINDOWS-1251',
4917
            'WIN1251'     => 'WINDOWS-1251',
4918
            'WINDOWS1251' => 'WINDOWS-1251',
4919
            'CP1252'      => 'WINDOWS-1252',
4920
            'WIN1252'     => 'WINDOWS-1252',
4921
            'WINDOWS1252' => 'WINDOWS-1252',
4922
            'CP1253'      => 'WINDOWS-1253',
4923
            'WIN1253'     => 'WINDOWS-1253',
4924
            'WINDOWS1253' => 'WINDOWS-1253',
4925
            'CP1254'      => 'WINDOWS-1254',
4926
            'WIN1254'     => 'WINDOWS-1254',
4927
            'WINDOWS1254' => 'WINDOWS-1254',
4928
            'CP1255'      => 'WINDOWS-1255',
4929
            'WIN1255'     => 'WINDOWS-1255',
4930
            'WINDOWS1255' => 'WINDOWS-1255',
4931
            'CP1256'      => 'WINDOWS-1256',
4932
            'WIN1256'     => 'WINDOWS-1256',
4933
            'WINDOWS1256' => 'WINDOWS-1256',
4934
            'CP1257'      => 'WINDOWS-1257',
4935
            'WIN1257'     => 'WINDOWS-1257',
4936
            'WINDOWS1257' => 'WINDOWS-1257',
4937
            'CP1258'      => 'WINDOWS-1258',
4938
            'WIN1258'     => 'WINDOWS-1258',
4939
            'WINDOWS1258' => 'WINDOWS-1258',
4940
            'UTF16'       => 'UTF-16',
4941
            'UTF32'       => 'UTF-32',
4942
            'UTF8'        => 'UTF-8',
4943
            'UTF'         => 'UTF-8',
4944
            'UTF7'        => 'UTF-7',
4945
            '8BIT'        => 'CP850',
4946
            'BINARY'      => 'CP850',
4947
        ];
4948
4949 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4950 3
            $encoding = $equivalences[$encoding_upper_helper];
4951
        }
4952
4953 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4954
4955 4
        return $encoding;
4956
    }
4957
4958
    /**
4959
     * Standardize line ending to unix-like.
4960
     *
4961
     * @param string          $str      <p>The input string.</p>
4962
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4963
     *                                  here.</p>
4964
     *
4965
     * @psalm-pure
4966
     *
4967
     * @return string
4968
     *                <p>A string with normalized line ending.</p>
4969
     */
4970 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4971
    {
4972 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4973
    }
4974
4975
    /**
4976
     * Normalize some MS Word special characters.
4977
     *
4978
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4979
     *
4980
     * @param string $str <p>The string to be normalized.</p>
4981
     *
4982
     * @psalm-pure
4983
     *
4984
     * @return string
4985
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4986
     */
4987 10
    public static function normalize_msword(string $str): string
4988
    {
4989 10
        return ASCII::normalize_msword($str);
4990
    }
4991
4992
    /**
4993
     * Normalize the whitespace.
4994
     *
4995
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4996
     *
4997
     * @param string $str                          <p>The string to be normalized.</p>
4998
     * @param bool   $keep_non_breaking_space      [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4999
     * @param bool   $keep_bidi_unicode_controls   [optional] <p>Set to true, to keep non-printable (for the web)
5000
     *                                             bidirectional text chars.</p>
5001
     * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
5002
     *
5003
     * @psalm-pure
5004
     *
5005
     * @return string
5006
     *                <p>A string with normalized whitespace.</p>
5007
     */
5008 61
    public static function normalize_whitespace(
5009
        string $str,
5010
        bool $keep_non_breaking_space = false,
5011
        bool $keep_bidi_unicode_controls = false,
5012
        bool $normalize_control_characters = false
5013
    ): string {
5014 61
        return ASCII::normalize_whitespace(
5015 61
            $str,
5016
            $keep_non_breaking_space,
5017
            $keep_bidi_unicode_controls,
5018
            $normalize_control_characters
5019
        );
5020
    }
5021
5022
    /**
5023
     * Calculates Unicode code point of the given UTF-8 encoded character.
5024
     *
5025
     * INFO: opposite to UTF8::chr()
5026
     *
5027
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
5028
     *
5029
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5030
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5031
     *
5032
     * @psalm-pure
5033
     *
5034
     * @return int
5035
     *             <p>Unicode code point of the given character,<br>
5036
     *             0 on invalid UTF-8 byte sequence</p>
5037
     */
5038 27
    public static function ord($chr, string $encoding = 'UTF-8'): int
5039
    {
5040
        /**
5041
         * @psalm-suppress ImpureStaticVariable
5042
         *
5043
         * @var array<string,int>
5044
         */
5045 27
        static $CHAR_CACHE = [];
5046
5047
        // init
5048 27
        $chr = (string) $chr;
5049
5050 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5051 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5052
        }
5053
5054 27
        $cache_key = $chr . '_' . $encoding;
5055 27
        if (isset($CHAR_CACHE[$cache_key])) {
5056 27
            return $CHAR_CACHE[$cache_key];
5057
        }
5058
5059
        // check again, if it's still not UTF-8
5060 11
        if ($encoding !== 'UTF-8') {
5061 3
            $chr = self::encode($encoding, $chr);
5062
        }
5063
5064 11
        if (self::$ORD === null) {
5065
            self::$ORD = self::getData('ord');
5066
        }
5067
5068 11
        if (isset(self::$ORD[$chr])) {
5069 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5070
        }
5071
5072
        //
5073
        // fallback via "IntlChar"
5074
        //
5075
5076 6
        if (self::$SUPPORT['intlChar'] === true) {
5077
            /** @noinspection PhpComposerExtensionStubsInspection */
5078 5
            $code = \IntlChar::ord($chr);
5079 5
            if ($code) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $code of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5080 5
                return $CHAR_CACHE[$cache_key] = $code;
5081
            }
5082
        }
5083
5084
        //
5085
        // fallback via vanilla php
5086
        //
5087
5088
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5089 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5090
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
5091
        /** @var int[] $chr - "unpack": only false if the format string contains errors */
5092 1
        $chr = $chr;
5093 1
        $code = $chr ? $chr[1] : 0;
5094
5095 1
        if ($code >= 0xF0 && isset($chr[4])) {
5096
            return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5097
        }
5098
5099 1
        if ($code >= 0xE0 && isset($chr[3])) {
5100 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5101
        }
5102
5103 1
        if ($code >= 0xC0 && isset($chr[2])) {
5104 1
            return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5105
        }
5106
5107
        return $CHAR_CACHE[$cache_key] = $code;
5108
    }
5109
5110
    /**
5111
     * Parses the string into an array (into the the second parameter).
5112
     *
5113
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5114
     *          if the second parameter is not set!
5115
     *
5116
     * EXAMPLE: <code>
5117
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5118
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5119
     * </code>
5120
     *
5121
     * @see http://php.net/manual/en/function.parse-str.php
5122
     *
5123
     * @param string $str        <p>The input string.</p>
5124
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5125
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5126
     *
5127
     * @psalm-pure
5128
     *
5129
     * @return bool
5130
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5131
     */
5132 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5133
    {
5134 2
        if ($clean_utf8) {
5135 2
            $str = self::clean($str);
5136
        }
5137
5138 2
        if (self::$SUPPORT['mbstring'] === true) {
5139 2
            $return = \mb_parse_str($str, $result);
5140
5141 2
            return $return !== false && $result !== [];
5142
        }
5143
5144
        /**
5145
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5146
         */
5147
        \parse_str($str, $result);
5148
5149
        return $result !== [];
5150
    }
5151
5152
    /**
5153
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5154
     *
5155
     * @psalm-pure
5156
     *
5157
     * @return bool
5158
     *              <p>
5159
     *              <strong>true</strong> if support is available,<br>
5160
     *              <strong>false</strong> otherwise
5161
     *              </p>
5162
     */
5163
    public static function pcre_utf8_support(): bool
5164
    {
5165
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5166
        return (bool) @\preg_match('//u', '');
5167
    }
5168
5169
    /**
5170
     * Create an array containing a range of UTF-8 characters.
5171
     *
5172
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5173
     *
5174
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5175
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5176
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5177
     *                              "is_numeric"</p>
5178
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5179
     * @param float|int  $step      [optional] <p>
5180
     *                              If a step value is given, it will be used as the
5181
     *                              increment between elements in the sequence. step
5182
     *                              should be given as a positive number. If not specified,
5183
     *                              step will default to 1.
5184
     *                              </p>
5185
     *
5186
     * @psalm-pure
5187
     *
5188
     * @return string[]
5189
     */
5190 2
    public static function range(
5191
        $var1,
5192
        $var2,
5193
        bool $use_ctype = true,
5194
        string $encoding = 'UTF-8',
5195
        $step = 1
5196
    ): array {
5197 2
        if (!$var1 || !$var2) {
5198 2
            return [];
5199
        }
5200
5201 2
        if ($step !== 1) {
5202
            /**
5203
             * @psalm-suppress RedundantConditionGivenDocblockType
5204
             * @psalm-suppress DocblockTypeContradiction
5205
             */
5206 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5207
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5208
            }
5209
5210
            /**
5211
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5212
             */
5213 1
            if ($step <= 0) {
5214
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5215
            }
5216
        }
5217
5218 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5219
            throw new \RuntimeException('ext-ctype: is not installed');
5220
        }
5221
5222 2
        $is_digit = false;
5223 2
        $is_xdigit = false;
5224
5225
        /** @noinspection PhpComposerExtensionStubsInspection */
5226 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5227 2
            $is_digit = true;
5228 2
            $start = (int) $var1;
5229 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5230
            $is_xdigit = true;
5231
            $start = (int) self::hex_to_int((string) $var1);
5232 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5233 1
            $start = (int) $var1;
5234
        } else {
5235 2
            $start = self::ord((string) $var1);
5236
        }
5237
5238 2
        if (!$start) {
5239
            return [];
5240
        }
5241
5242 2
        if ($is_digit) {
5243 2
            $end = (int) $var2;
5244 2
        } elseif ($is_xdigit) {
5245
            $end = (int) self::hex_to_int((string) $var2);
5246 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5247 1
            $end = (int) $var2;
5248
        } else {
5249 2
            $end = self::ord((string) $var2);
5250
        }
5251
5252 2
        if (!$end) {
5253
            return [];
5254
        }
5255
5256 2
        $array = [];
5257 2
        foreach (\range($start, $end, $step) as $i) {
5258 2
            $array[] = (string) self::chr((int) $i, $encoding);
5259
        }
5260
5261 2
        return $array;
5262
    }
5263
5264
    /**
5265
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5266
     *
5267
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5268
     *
5269
     * e.g:
5270
     * 'test+test'                     => 'test+test'
5271
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5272
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5273
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5274
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5275
     * 'Düsseldorf'                   => 'Düsseldorf'
5276
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5277
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5278
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5279
     *
5280
     * @param string $str          <p>The input string.</p>
5281
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5282
     *
5283
     * @psalm-pure
5284
     *
5285
     * @return string
5286
     *                <p>The decoded URL, as a string.</p>
5287
     */
5288 7
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5289
    {
5290 7
        if ($str === '') {
5291 4
            return '';
5292
        }
5293
5294 7
        $str = self::urldecode_unicode_helper($str);
5295
5296 7
        if ($multi_decode) {
5297
            do {
5298 6
                $str_compare = $str;
5299
5300
                /**
5301
                 * @psalm-suppress PossiblyInvalidArgument
5302
                 */
5303 6
                $str = \rawurldecode(
5304 6
                    self::html_entity_decode(
5305 6
                        self::to_utf8($str),
5306 6
                        \ENT_QUOTES | \ENT_HTML5
5307
                    )
5308
                );
5309 6
            } while ($str_compare !== $str);
5310
        } else {
5311
            /**
5312
             * @psalm-suppress PossiblyInvalidArgument
5313
             */
5314 1
            $str = \rawurldecode(
5315 1
                self::html_entity_decode(
5316 1
                    self::to_utf8($str),
5317 1
                    \ENT_QUOTES | \ENT_HTML5
5318
                )
5319
            );
5320
        }
5321
5322 7
        return self::fix_simple_utf8($str);
5323
    }
5324
5325
    /**
5326
     * Replaces all occurrences of $pattern in $str by $replacement.
5327
     *
5328
     * @param string $str         <p>The input string.</p>
5329
     * @param string $pattern     <p>The regular expression pattern.</p>
5330
     * @param string $replacement <p>The string to replace with.</p>
5331
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5332
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5333
     *
5334
     * @psalm-pure
5335
     *
5336
     * @return string
5337
     */
5338 18
    public static function regex_replace(
5339
        string $str,
5340
        string $pattern,
5341
        string $replacement,
5342
        string $options = '',
5343
        string $delimiter = '/'
5344
    ): string {
5345 18
        if ($options === 'msr') {
5346 9
            $options = 'ms';
5347
        }
5348
5349
        // fallback
5350 18
        if (!$delimiter) {
5351
            $delimiter = '/';
5352
        }
5353
5354 18
        return (string) \preg_replace(
5355 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5356 18
            $replacement,
5357 18
            $str
5358
        );
5359
    }
5360
5361
    /**
5362
     * alias for "UTF8::remove_bom()"
5363
     *
5364
     * @param string $str
5365
     *
5366
     * @psalm-pure
5367
     *
5368
     * @return string
5369
     *
5370
     * @see        UTF8::remove_bom()
5371
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5372
     */
5373 1
    public static function removeBOM(string $str): string
5374
    {
5375 1
        return self::remove_bom($str);
5376
    }
5377
5378
    /**
5379
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5380
     *
5381
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5382
     *
5383
     * @param string $str <p>The input string.</p>
5384
     *
5385
     * @psalm-pure
5386
     *
5387
     * @return string
5388
     *                <p>A string without UTF-BOM.</p>
5389
     */
5390 55
    public static function remove_bom(string $str): string
5391
    {
5392 55
        if ($str === '') {
5393 9
            return '';
5394
        }
5395
5396 55
        $str_length = \strlen($str);
5397 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5398 55
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
5399
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5400 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5401 11
                if ($str_tmp === false) {
5402
                    return '';
5403
                }
5404
5405 11
                $str_length -= $bom_byte_length;
5406
5407 11
                $str = (string) $str_tmp;
5408
            }
5409
        }
5410
5411 55
        return $str;
5412
    }
5413
5414
    /**
5415
     * Removes duplicate occurrences of a string in another string.
5416
     *
5417
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5418
     *
5419
     * @param string          $str  <p>The base string.</p>
5420
     * @param string|string[] $what <p>String to search for in the base string.</p>
5421
     *
5422
     * @psalm-pure
5423
     *
5424
     * @return string
5425
     *                <p>A string with removed duplicates.</p>
5426
     */
5427 2
    public static function remove_duplicates(string $str, $what = ' '): string
5428
    {
5429 2
        if (\is_string($what)) {
5430 2
            $what = [$what];
5431
        }
5432
5433
        /**
5434
         * @psalm-suppress RedundantConditionGivenDocblockType
5435
         */
5436 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5437 2
            foreach ($what as $item) {
5438 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5439
            }
5440
        }
5441
5442 2
        return $str;
5443
    }
5444
5445
    /**
5446
     * Remove html via "strip_tags()" from the string.
5447
     *
5448
     * @param string $str            <p>The input string.</p>
5449
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5450
     *                               should not be stripped. Default: null
5451
     *                               </p>
5452
     *
5453
     * @psalm-pure
5454
     *
5455
     * @return string
5456
     *                <p>A string with without html tags.</p>
5457
     */
5458 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
5459
    {
5460 6
        return \strip_tags($str, $allowable_tags);
5461
    }
5462
5463
    /**
5464
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5465
     *
5466
     * @param string $str         <p>The input string.</p>
5467
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5468
     *
5469
     * @psalm-pure
5470
     *
5471
     * @return string
5472
     *                <p>A string without breaks.</p>
5473
     */
5474 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5475
    {
5476 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5477
    }
5478
5479
    /**
5480
     * Remove invisible characters from a string.
5481
     *
5482
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5483
     *
5484
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5485
     *
5486
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5487
     *
5488
     * @param string $str                           <p>The input string.</p>
5489
     * @param bool   $url_encoded                   [optional] <p>
5490
     *                                              Try to remove url encoded control character.
5491
     *                                              WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5492
     *                                              <br>
5493
     *                                              Default: false
5494
     *                                              </p>
5495
     * @param string $replacement                   [optional] <p>The replacement character.</p>
5496
     * @param bool   $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p>
5497
     *
5498
     * @psalm-pure
5499
     *
5500
     * @return string
5501
     *                <p>A string without invisible chars.</p>
5502
     */
5503 92
    public static function remove_invisible_characters(
5504
        string $str,
5505
        bool $url_encoded = false,
5506
        string $replacement = '',
5507
        bool $keep_basic_control_characters = true
5508
    ): string {
5509 92
        return ASCII::remove_invisible_characters(
5510 92
            $str,
5511
            $url_encoded,
5512
            $replacement,
5513
            $keep_basic_control_characters
5514
        );
5515
    }
5516
5517
    /**
5518
     * Returns a new string with the prefix $substring removed, if present.
5519
     *
5520
     * @param string $str       <p>The input string.</p>
5521
     * @param string $substring <p>The prefix to remove.</p>
5522
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5523
     *
5524
     * @psalm-pure
5525
     *
5526
     * @return string
5527
     *                <p>A string without the prefix $substring.</p>
5528
     */
5529 12
    public static function remove_left(
5530
        string $str,
5531
        string $substring,
5532
        string $encoding = 'UTF-8'
5533
    ): string {
5534
        if (
5535 12
            $substring
5536
            &&
5537 12
            \strpos($str, $substring) === 0
5538
        ) {
5539 6
            if ($encoding === 'UTF-8') {
5540 4
                return (string) \mb_substr(
5541 4
                    $str,
5542 4
                    (int) \mb_strlen($substring)
5543
                );
5544
            }
5545
5546 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5547
5548 2
            return (string) self::substr(
5549 2
                $str,
5550 2
                (int) self::strlen($substring, $encoding),
5551 2
                null,
5552
                $encoding
5553
            );
5554
        }
5555
5556 6
        return $str;
5557
    }
5558
5559
    /**
5560
     * Returns a new string with the suffix $substring removed, if present.
5561
     *
5562
     * @param string $str
5563
     * @param string $substring <p>The suffix to remove.</p>
5564
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5565
     *
5566
     * @psalm-pure
5567
     *
5568
     * @return string
5569
     *                <p>A string having a $str without the suffix $substring.</p>
5570
     */
5571 12
    public static function remove_right(
5572
        string $str,
5573
        string $substring,
5574
        string $encoding = 'UTF-8'
5575
    ): string {
5576 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5577 6
            if ($encoding === 'UTF-8') {
5578 4
                return (string) \mb_substr(
5579 4
                    $str,
5580 4
                    0,
5581 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5582
                );
5583
            }
5584
5585 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5586
5587 2
            return (string) self::substr(
5588 2
                $str,
5589 2
                0,
5590 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5591
                $encoding
5592
            );
5593
        }
5594
5595 6
        return $str;
5596
    }
5597
5598
    /**
5599
     * Replaces all occurrences of $search in $str by $replacement.
5600
     *
5601
     * @param string $str            <p>The input string.</p>
5602
     * @param string $search         <p>The needle to search for.</p>
5603
     * @param string $replacement    <p>The string to replace with.</p>
5604
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5605
     *
5606
     * @psalm-pure
5607
     *
5608
     * @return string
5609
     *                <p>A string with replaced parts.</p>
5610
     */
5611 29
    public static function replace(
5612
        string $str,
5613
        string $search,
5614
        string $replacement,
5615
        bool $case_sensitive = true
5616
    ): string {
5617 29
        if ($case_sensitive) {
5618 22
            return \str_replace($search, $replacement, $str);
5619
        }
5620
5621 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5622
    }
5623
5624
    /**
5625
     * Replaces all occurrences of $search in $str by $replacement.
5626
     *
5627
     * @param string       $str            <p>The input string.</p>
5628
     * @param array        $search         <p>The elements to search for.</p>
5629
     * @param array|string $replacement    <p>The string to replace with.</p>
5630
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5631
     *
5632
     * @psalm-pure
5633
     *
5634
     * @return string
5635
     *                <p>A string with replaced parts.</p>
5636
     */
5637 30
    public static function replace_all(
5638
        string $str,
5639
        array $search,
5640
        $replacement,
5641
        bool $case_sensitive = true
5642
    ): string {
5643 30
        if ($case_sensitive) {
5644 23
            return \str_replace($search, $replacement, $str);
5645
        }
5646
5647 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5648
    }
5649
5650
    /**
5651
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5652
     *
5653
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5654
     *
5655
     * @param string $str                        <p>The input string</p>
5656
     * @param string $replacement_char           <p>The replacement character.</p>
5657
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5658
     *
5659
     * @psalm-pure
5660
     *
5661
     * @return string
5662
     *                <p>A string without diamond question marks (�).</p>
5663
     */
5664 35
    public static function replace_diamond_question_mark(
5665
        string $str,
5666
        string $replacement_char = '',
5667
        bool $process_invalid_utf8_chars = true
5668
    ): string {
5669 35
        if ($str === '') {
5670 9
            return '';
5671
        }
5672
5673 35
        if ($process_invalid_utf8_chars) {
5674 35
            if ($replacement_char === '') {
5675 35
                $replacement_char_helper = 'none';
5676
            } else {
5677 2
                $replacement_char_helper = \ord($replacement_char);
5678
            }
5679
5680 35
            if (self::$SUPPORT['mbstring'] === false) {
5681
                // if there is no native support for "mbstring",
5682
                // then we need to clean the string before ...
5683
                $str = self::clean($str);
5684
            }
5685
5686
            /**
5687
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5688
             */
5689 35
            $save = \mb_substitute_character();
5690
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5691 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5691
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5692
            // the polyfill maybe return false, so cast to string
5693 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5694 35
            \mb_substitute_character($save);
0 ignored issues
show
Bug introduced by
It seems like $save can also be of type true; however, parameter $substitute_character of mb_substitute_character() does only seem to accept integer|null|string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5694
            \mb_substitute_character(/** @scrutinizer ignore-type */ $save);
Loading history...
5695
        }
5696
5697 35
        return \str_replace(
5698
            [
5699 35
                "\xEF\xBF\xBD",
5700
                '�',
5701
            ],
5702
            [
5703 35
                $replacement_char,
5704 35
                $replacement_char,
5705
            ],
5706 35
            $str
5707
        );
5708
    }
5709
5710
    /**
5711
     * Strip whitespace or other characters from the end of a UTF-8 string.
5712
     *
5713
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5714
     *
5715
     * @param string      $str   <p>The string to be trimmed.</p>
5716
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5717
     *
5718
     * @psalm-pure
5719
     *
5720
     * @return string
5721
     *                <p>A string with unwanted characters stripped from the right.</p>
5722
     */
5723 21
    public static function rtrim(string $str = '', string $chars = null): string
5724
    {
5725 21
        if ($str === '') {
5726 3
            return '';
5727
        }
5728
5729 20
        if (self::$SUPPORT['mbstring'] === true) {
5730 20
            if ($chars !== null) {
5731
                /** @noinspection PregQuoteUsageInspection */
5732 9
                $chars = \preg_quote($chars);
5733 9
                $pattern = "[${chars}]+$";
5734
            } else {
5735 14
                $pattern = '[\\s]+$';
5736
            }
5737
5738
            /** @noinspection PhpComposerExtensionStubsInspection */
5739 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5740
        }
5741
5742
        if ($chars !== null) {
5743
            $chars = \preg_quote($chars, '/');
5744
            $pattern = "[${chars}]+$";
5745
        } else {
5746
            $pattern = '[\\s]+$';
5747
        }
5748
5749
        return self::regex_replace($str, $pattern, '');
5750
    }
5751
5752
    /**
5753
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5754
     *
5755
     * @param bool $useEcho
5756
     *
5757
     * @psalm-pure
5758
     *
5759
     * @return string|void
5760
     */
5761 2
    public static function showSupport(bool $useEcho = true)
5762
    {
5763
        // init
5764 2
        $html = '';
5765
5766 2
        $html .= '<pre>';
5767
        /** @noinspection AlterInForeachInspection */
5768 2
        foreach (self::$SUPPORT as $key => &$value) {
5769 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
0 ignored issues
show
Bug introduced by
Are you sure print_r($value, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

5769
            $html .= $key . ' - ' . /** @scrutinizer ignore-type */ \print_r($value, true) . "\n<br>";
Loading history...
5770
        }
5771 2
        $html .= '</pre>';
5772
5773 2
        if ($useEcho) {
5774 1
            echo $html;
5775
        }
5776
5777 2
        return $html;
5778
    }
5779
5780
    /**
5781
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5782
     *
5783
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5784
     *
5785
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5786
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5787
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5788
     *
5789
     * @psalm-pure
5790
     *
5791
     * @return string
5792
     *                <p>The HTML numbered entity for the given character.</p>
5793
     */
5794 2
    public static function single_chr_html_encode(
5795
        string $char,
5796
        bool $keep_ascii_chars = false,
5797
        string $encoding = 'UTF-8'
5798
    ): string {
5799 2
        if ($char === '') {
5800 2
            return '';
5801
        }
5802
5803
        if (
5804 2
            $keep_ascii_chars
5805
            &&
5806 2
            ASCII::is_ascii($char)
5807
        ) {
5808 2
            return $char;
5809
        }
5810
5811 2
        return '&#' . self::ord($char, $encoding) . ';';
5812
    }
5813
5814
    /**
5815
     * @param string $str
5816
     * @param int    $tab_length
5817
     *
5818
     * @psalm-pure
5819
     *
5820
     * @return string
5821
     */
5822 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5823
    {
5824 5
        if ($tab_length === 4) {
5825 3
            $tab = '    ';
5826 2
        } elseif ($tab_length === 2) {
5827 1
            $tab = '  ';
5828
        } else {
5829 1
            $tab = \str_repeat(' ', $tab_length);
5830
        }
5831
5832 5
        return \str_replace($tab, "\t", $str);
5833
    }
5834
5835
    /**
5836
     * alias for "UTF8::str_split()"
5837
     *
5838
     * @param int|string $str
5839
     * @param int        $length
5840
     * @param bool       $clean_utf8
5841
     *
5842
     * @psalm-pure
5843
     *
5844
     * @return string[]
5845
     *
5846
     * @see        UTF8::str_split()
5847
     * @deprecated <p>please use "UTF8::str_split()"</p>
5848
     */
5849 9
    public static function split(
5850
        $str,
5851
        int $length = 1,
5852
        bool $clean_utf8 = false
5853
    ): array {
5854
        /** @var string[] */
5855 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type string[] which are incompatible with the documented value type string.
Loading history...
5856
    }
5857
5858
    /**
5859
     * alias for "UTF8::str_starts_with()"
5860
     *
5861
     * @param string $haystack
5862
     * @param string $needle
5863
     *
5864
     * @psalm-pure
5865
     *
5866
     * @return bool
5867
     *
5868
     * @see        UTF8::str_starts_with()
5869
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5870
     */
5871 1
    public static function str_begins(string $haystack, string $needle): bool
5872
    {
5873 1
        return self::str_starts_with($haystack, $needle);
5874
    }
5875
5876
    /**
5877
     * Returns a camelCase version of the string. Trims surrounding spaces,
5878
     * capitalizes letters following digits, spaces, dashes and underscores,
5879
     * and removes spaces, dashes, as well as underscores.
5880
     *
5881
     * @param string      $str                           <p>The input string.</p>
5882
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5883
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5884
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5885
     *                                                   tr</p>
5886
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5887
     *                                                   -> ß</p>
5888
     *
5889
     * @psalm-pure
5890
     *
5891
     * @return string
5892
     */
5893 32
    public static function str_camelize(
5894
        string $str,
5895
        string $encoding = 'UTF-8',
5896
        bool $clean_utf8 = false,
5897
        string $lang = null,
5898
        bool $try_to_keep_the_string_length = false
5899
    ): string {
5900 32
        if ($clean_utf8) {
5901
            $str = self::clean($str);
5902
        }
5903
5904 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5905 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5906
        }
5907
5908 32
        $str = self::lcfirst(
5909 32
            \trim($str),
5910 32
            $encoding,
5911 32
            false,
5912 32
            $lang,
5913 32
            $try_to_keep_the_string_length
5914
        );
5915 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5916
5917 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5918
5919 32
        $str = (string) \preg_replace_callback(
5920 32
            '/[-_\\s]+(.)?/u',
5921
            /**
5922
             * @param array $match
5923
             *
5924
             * @psalm-pure
5925
             *
5926
             * @return string
5927
             */
5928 32
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5929 27
                if (isset($match[1])) {
5930 27
                    if ($use_mb_functions) {
5931 27
                        if ($encoding === 'UTF-8') {
5932 27
                            return \mb_strtoupper($match[1]);
5933
                        }
5934
5935
                        return \mb_strtoupper($match[1], $encoding);
5936
                    }
5937
5938
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5939
                }
5940
5941 1
                return '';
5942 32
            },
5943 32
            $str
5944
        );
5945
5946 32
        return (string) \preg_replace_callback(
5947 32
            '/[\\p{N}]+(.)?/u',
5948
            /**
5949
             * @param array $match
5950
             *
5951
             * @psalm-pure
5952
             *
5953
             * @return string
5954
             */
5955 32
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5956 6
                if ($use_mb_functions) {
5957 6
                    if ($encoding === 'UTF-8') {
5958 6
                        return \mb_strtoupper($match[0]);
5959
                    }
5960
5961
                    return \mb_strtoupper($match[0], $encoding);
5962
                }
5963
5964
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5965 32
            },
5966 32
            $str
5967
        );
5968
    }
5969
5970
    /**
5971
     * Returns the string with the first letter of each word capitalized,
5972
     * except for when the word is a name which shouldn't be capitalized.
5973
     *
5974
     * @param string $str
5975
     *
5976
     * @psalm-pure
5977
     *
5978
     * @return string
5979
     *                <p>A string with $str capitalized.</p>
5980
     */
5981 1
    public static function str_capitalize_name(string $str): string
5982
    {
5983 1
        return self::str_capitalize_name_helper(
5984 1
            self::str_capitalize_name_helper(
5985 1
                self::collapse_whitespace($str),
5986 1
                ' '
5987
            ),
5988 1
            '-'
5989
        );
5990
    }
5991
5992
    /**
5993
     * Returns true if the string contains $needle, false otherwise. By default
5994
     * the comparison is case-sensitive, but can be made insensitive by setting
5995
     * $case_sensitive to false.
5996
     *
5997
     * @param string $haystack       <p>The input string.</p>
5998
     * @param string $needle         <p>Substring to look for.</p>
5999
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6000
     *
6001
     * @psalm-pure
6002
     *
6003
     * @return bool
6004
     *              <p>Whether or not $haystack contains $needle.</p>
6005
     */
6006 21
    public static function str_contains(
6007
        string $haystack,
6008
        string $needle,
6009
        bool $case_sensitive = true
6010
    ): bool {
6011 21
        if ($case_sensitive) {
6012 11
            if (\PHP_VERSION_ID >= 80000) {
6013
                /** @phpstan-ignore-next-line - only for PHP8 */
6014 11
                return \str_contains($haystack, $needle);
6015
            }
6016
6017
            return \strpos($haystack, $needle) !== false;
6018
        }
6019
6020 10
        return \mb_stripos($haystack, $needle) !== false;
6021
    }
6022
6023
    /**
6024
     * Returns true if the string contains all $needles, false otherwise. By
6025
     * default the comparison is case-sensitive, but can be made insensitive by
6026
     * setting $case_sensitive to false.
6027
     *
6028
     * @param string $haystack       <p>The input string.</p>
6029
     * @param array  $needles        <p>SubStrings to look for.</p>
6030
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6031
     *
6032
     * @psalm-pure
6033
     *
6034
     * @return bool
6035
     *              <p>Whether or not $haystack contains $needle.</p>
6036
     */
6037 45
    public static function str_contains_all(
6038
        string $haystack,
6039
        array $needles,
6040
        bool $case_sensitive = true
6041
    ): bool {
6042 45
        if ($haystack === '' || $needles === []) {
6043 1
            return false;
6044
        }
6045
6046
        /** @noinspection LoopWhichDoesNotLoopInspection */
6047 44
        foreach ($needles as &$needle) {
6048 44
            if ($case_sensitive) {
6049
                /** @noinspection NestedPositiveIfStatementsInspection */
6050 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6051 12
                    return false;
6052
                }
6053
            }
6054
6055 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6056 8
                return false;
6057
            }
6058
        }
6059
6060 24
        return true;
6061
    }
6062
6063
    /**
6064
     * Returns true if the string contains any $needles, false otherwise. By
6065
     * default the comparison is case-sensitive, but can be made insensitive by
6066
     * setting $case_sensitive to false.
6067
     *
6068
     * @param string $haystack       <p>The input string.</p>
6069
     * @param array  $needles        <p>SubStrings to look for.</p>
6070
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6071
     *
6072
     * @psalm-pure
6073
     *
6074
     * @return bool
6075
     *              <p>Whether or not $str contains $needle.</p>
6076
     */
6077 46
    public static function str_contains_any(
6078
        string $haystack,
6079
        array $needles,
6080
        bool $case_sensitive = true
6081
    ): bool {
6082 46
        if ($haystack === '' || $needles === []) {
6083 1
            return false;
6084
        }
6085
6086
        /** @noinspection LoopWhichDoesNotLoopInspection */
6087 45
        foreach ($needles as &$needle) {
6088 45
            if (!$needle) {
6089
                continue;
6090
            }
6091
6092 45
            if ($case_sensitive) {
6093 25
                if (\strpos($haystack, $needle) !== false) {
6094 14
                    return true;
6095
                }
6096
6097 13
                continue;
6098
            }
6099
6100 20
            if (\mb_stripos($haystack, $needle) !== false) {
6101 12
                return true;
6102
            }
6103
        }
6104
6105 19
        return false;
6106
    }
6107
6108
    /**
6109
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6110
     * inserted before uppercase characters (with the exception of the first
6111
     * character of the string), and in place of spaces as well as underscores.
6112
     *
6113
     * @param string $str      <p>The input string.</p>
6114
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6115
     *
6116
     * @psalm-pure
6117
     *
6118
     * @return string
6119
     */
6120 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6121
    {
6122 19
        return self::str_delimit($str, '-', $encoding);
6123
    }
6124
6125
    /**
6126
     * Returns a lowercase and trimmed string separated by the given delimiter.
6127
     * Delimiters are inserted before uppercase characters (with the exception
6128
     * of the first character of the string), and in place of spaces, dashes,
6129
     * and underscores. Alpha delimiters are not converted to lowercase.
6130
     *
6131
     * @param string      $str                           <p>The input string.</p>
6132
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6133
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6134
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6135
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6136
     *                                                   tr</p>
6137
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6138
     *                                                   ß</p>
6139
     *
6140
     * @psalm-pure
6141
     *
6142
     * @return string
6143
     */
6144 49
    public static function str_delimit(
6145
        string $str,
6146
        string $delimiter,
6147
        string $encoding = 'UTF-8',
6148
        bool $clean_utf8 = false,
6149
        string $lang = null,
6150
        bool $try_to_keep_the_string_length = false
6151
    ): string {
6152 49
        if (self::$SUPPORT['mbstring'] === true) {
6153
            /** @noinspection PhpComposerExtensionStubsInspection */
6154 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6155
6156 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6157 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6158 22
                $str = \mb_strtolower($str);
6159
            } else {
6160 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6161
            }
6162
6163
            /** @noinspection PhpComposerExtensionStubsInspection */
6164 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6165
        }
6166
6167
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6168
6169
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6170
        if ($use_mb_functions && $encoding === 'UTF-8') {
6171
            $str = \mb_strtolower($str);
6172
        } else {
6173
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6174
        }
6175
6176
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6177
    }
6178
6179
    /**
6180
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6181
     *
6182
     * EXAMPLE: <code>
6183
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6184
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6185
     * </code>
6186
     *
6187
     * @param string $str <p>The input string.</p>
6188
     *
6189
     * @psalm-pure
6190
     *
6191
     * @return false|string
6192
     *                      <p>
6193
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6194
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6195
     *                      </p>
6196
     */
6197 31
    public static function str_detect_encoding($str)
6198
    {
6199
        // init
6200 31
        $str = (string) $str;
6201
6202
        //
6203
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6204
        //
6205
6206 31
        if (self::is_binary($str, true)) {
6207 11
            $is_utf32 = self::is_utf32($str, false);
6208 11
            if ($is_utf32 === 1) {
6209
                return 'UTF-32LE';
6210
            }
6211 11
            if ($is_utf32 === 2) {
6212 1
                return 'UTF-32BE';
6213
            }
6214
6215 11
            $is_utf16 = self::is_utf16($str, false);
6216 11
            if ($is_utf16 === 1) {
6217 3
                return 'UTF-16LE';
6218
            }
6219 11
            if ($is_utf16 === 2) {
6220 2
                return 'UTF-16BE';
6221
            }
6222
6223
            // is binary but not "UTF-16" or "UTF-32"
6224 9
            return false;
6225
        }
6226
6227
        //
6228
        // 2.) simple check for ASCII chars
6229
        //
6230
6231 27
        if (ASCII::is_ascii($str)) {
6232 10
            return 'ASCII';
6233
        }
6234
6235
        //
6236
        // 3.) simple check for UTF-8 chars
6237
        //
6238
6239 27
        if (self::is_utf8_string($str)) {
6240 19
            return 'UTF-8';
6241
        }
6242
6243
        //
6244
        // 4.) check via "mb_detect_encoding()"
6245
        //
6246
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6247
6248 16
        $encoding_detecting_order = [
6249
            'ISO-8859-1',
6250
            'ISO-8859-2',
6251
            'ISO-8859-3',
6252
            'ISO-8859-4',
6253
            'ISO-8859-5',
6254
            'ISO-8859-6',
6255
            'ISO-8859-7',
6256
            'ISO-8859-8',
6257
            'ISO-8859-9',
6258
            'ISO-8859-10',
6259
            'ISO-8859-13',
6260
            'ISO-8859-14',
6261
            'ISO-8859-15',
6262
            'ISO-8859-16',
6263
            'WINDOWS-1251',
6264
            'WINDOWS-1252',
6265
            'WINDOWS-1254',
6266
            'CP932',
6267
            'CP936',
6268
            'CP950',
6269
            'CP866',
6270
            'CP850',
6271
            'CP51932',
6272
            'CP50220',
6273
            'CP50221',
6274
            'CP50222',
6275
            'ISO-2022-JP',
6276
            'ISO-2022-KR',
6277
            'JIS',
6278
            'JIS-ms',
6279
            'EUC-CN',
6280
            'EUC-JP',
6281
        ];
6282
6283 16
        if (self::$SUPPORT['mbstring'] === true) {
6284
            // info: do not use the symfony polyfill here
6285 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6286 16
            if ($encoding) {
6287 16
                return $encoding;
6288
            }
6289
        }
6290
6291
        //
6292
        // 5.) check via "iconv()"
6293
        //
6294
6295
        if (self::$ENCODINGS === null) {
6296
            self::$ENCODINGS = self::getData('encodings');
6297
        }
6298
6299
        foreach (self::$ENCODINGS as $encoding_tmp) {
6300
            // INFO: //IGNORE but still throw notice
6301
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6302
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6303
                return $encoding_tmp;
6304
            }
6305
        }
6306
6307
        return false;
6308
    }
6309
6310
    /**
6311
     * alias for "UTF8::str_ends_with()"
6312
     *
6313
     * @param string $haystack
6314
     * @param string $needle
6315
     *
6316
     * @psalm-pure
6317
     *
6318
     * @return bool
6319
     *
6320
     * @see        UTF8::str_ends_with()
6321
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6322
     */
6323 1
    public static function str_ends(string $haystack, string $needle): bool
6324
    {
6325 1
        return self::str_ends_with($haystack, $needle);
6326
    }
6327
6328
    /**
6329
     * Check if the string ends with the given substring.
6330
     *
6331
     * EXAMPLE: <code>
6332
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6333
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6334
     * </code>
6335
     *
6336
     * @param string $haystack <p>The string to search in.</p>
6337
     * @param string $needle   <p>The substring to search for.</p>
6338
     *
6339
     * @psalm-pure
6340
     *
6341
     * @return bool
6342
     */
6343 9
    public static function str_ends_with(string $haystack, string $needle): bool
6344
    {
6345 9
        if ($needle === '') {
6346 2
            return true;
6347
        }
6348
6349 9
        if ($haystack === '') {
6350 1
            return false;
6351
        }
6352
6353 9
        if (\PHP_VERSION_ID >= 80000) {
6354
            /** @phpstan-ignore-next-line - only for PHP8 */
6355 9
            return \str_ends_with($haystack, $needle);
6356
        }
6357
6358
        return \substr($haystack, -\strlen($needle)) === $needle;
6359
    }
6360
6361
    /**
6362
     * Returns true if the string ends with any of $substrings, false otherwise.
6363
     *
6364
     * - case-sensitive
6365
     *
6366
     * @param string   $str        <p>The input string.</p>
6367
     * @param string[] $substrings <p>Substrings to look for.</p>
6368
     *
6369
     * @psalm-pure
6370
     *
6371
     * @return bool
6372
     *              <p>Whether or not $str ends with $substring.</p>
6373
     */
6374 7
    public static function str_ends_with_any(string $str, array $substrings): bool
6375
    {
6376 7
        if ($substrings === []) {
6377
            return false;
6378
        }
6379
6380 7
        foreach ($substrings as &$substring) {
6381 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6382 1
                return true;
6383
            }
6384
        }
6385
6386 6
        return false;
6387
    }
6388
6389
    /**
6390
     * Ensures that the string begins with $substring. If it doesn't, it's
6391
     * prepended.
6392
     *
6393
     * @param string $str       <p>The input string.</p>
6394
     * @param string $substring <p>The substring to add if not present.</p>
6395
     *
6396
     * @psalm-pure
6397
     *
6398
     * @return string
6399
     */
6400 10
    public static function str_ensure_left(string $str, string $substring): string
6401
    {
6402
        if (
6403 10
            $substring !== ''
6404
            &&
6405 10
            \strpos($str, $substring) === 0
6406
        ) {
6407 6
            return $str;
6408
        }
6409
6410 4
        return $substring . $str;
6411
    }
6412
6413
    /**
6414
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6415
     *
6416
     * @param string $str       <p>The input string.</p>
6417
     * @param string $substring <p>The substring to add if not present.</p>
6418
     *
6419
     * @psalm-pure
6420
     *
6421
     * @return string
6422
     */
6423 10
    public static function str_ensure_right(string $str, string $substring): string
6424
    {
6425
        if (
6426 10
            $str === ''
6427
            ||
6428 10
            $substring === ''
6429
            ||
6430 10
            \substr($str, -\strlen($substring)) !== $substring
6431
        ) {
6432 4
            $str .= $substring;
6433
        }
6434
6435 10
        return $str;
6436
    }
6437
6438
    /**
6439
     * Capitalizes the first word of the string, replaces underscores with
6440
     * spaces, and strips '_id'.
6441
     *
6442
     * @param string $str
6443
     *
6444
     * @psalm-pure
6445
     *
6446
     * @return string
6447
     */
6448 3
    public static function str_humanize($str): string
6449
    {
6450 3
        $str = \str_replace(
6451
            [
6452 3
                '_id',
6453
                '_',
6454
            ],
6455
            [
6456 3
                '',
6457
                ' ',
6458
            ],
6459 3
            $str
6460
        );
6461
6462 3
        return self::ucfirst(\trim($str));
6463
    }
6464
6465
    /**
6466
     * alias for "UTF8::str_istarts_with()"
6467
     *
6468
     * @param string $haystack
6469
     * @param string $needle
6470
     *
6471
     * @psalm-pure
6472
     *
6473
     * @return bool
6474
     *
6475
     * @see        UTF8::str_istarts_with()
6476
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6477
     */
6478 1
    public static function str_ibegins(string $haystack, string $needle): bool
6479
    {
6480 1
        return self::str_istarts_with($haystack, $needle);
6481
    }
6482
6483
    /**
6484
     * alias for "UTF8::str_iends_with()"
6485
     *
6486
     * @param string $haystack
6487
     * @param string $needle
6488
     *
6489
     * @psalm-pure
6490
     *
6491
     * @return bool
6492
     *
6493
     * @see        UTF8::str_iends_with()
6494
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6495
     */
6496 1
    public static function str_iends(string $haystack, string $needle): bool
6497
    {
6498 1
        return self::str_iends_with($haystack, $needle);
6499
    }
6500
6501
    /**
6502
     * Check if the string ends with the given substring, case-insensitive.
6503
     *
6504
     * EXAMPLE: <code>
6505
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6506
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6507
     * </code>
6508
     *
6509
     * @param string $haystack <p>The string to search in.</p>
6510
     * @param string $needle   <p>The substring to search for.</p>
6511
     *
6512
     * @psalm-pure
6513
     *
6514
     * @return bool
6515
     */
6516 12
    public static function str_iends_with(string $haystack, string $needle): bool
6517
    {
6518 12
        if ($needle === '') {
6519 2
            return true;
6520
        }
6521
6522 12
        if ($haystack === '') {
6523
            return false;
6524
        }
6525
6526 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6527
    }
6528
6529
    /**
6530
     * Returns true if the string ends with any of $substrings, false otherwise.
6531
     *
6532
     * - case-insensitive
6533
     *
6534
     * @param string   $str        <p>The input string.</p>
6535
     * @param string[] $substrings <p>Substrings to look for.</p>
6536
     *
6537
     * @psalm-pure
6538
     *
6539
     * @return bool
6540
     *              <p>Whether or not $str ends with $substring.</p>
6541
     */
6542 4
    public static function str_iends_with_any(string $str, array $substrings): bool
6543
    {
6544 4
        if ($substrings === []) {
6545
            return false;
6546
        }
6547
6548 4
        foreach ($substrings as &$substring) {
6549 4
            if (self::str_iends_with($str, $substring)) {
6550 4
                return true;
6551
            }
6552
        }
6553
6554
        return false;
6555
    }
6556
6557
    /**
6558
     * Returns the index of the first occurrence of $needle in the string,
6559
     * and false if not found. Accepts an optional offset from which to begin
6560
     * the search.
6561
     *
6562
     * @param string $str      <p>The input string.</p>
6563
     * @param string $needle   <p>Substring to look for.</p>
6564
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6565
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6566
     *
6567
     * @psalm-pure
6568
     *
6569
     * @return false|int
6570
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6571
     *
6572
     * @see        UTF8::stripos()
6573
     * @deprecated <p>please use "UTF8::stripos()"</p>
6574
     */
6575 1
    public static function str_iindex_first(
6576
        string $str,
6577
        string $needle,
6578
        int $offset = 0,
6579
        string $encoding = 'UTF-8'
6580
    ) {
6581 1
        return self::stripos(
6582 1
            $str,
6583
            $needle,
6584
            $offset,
6585
            $encoding
6586
        );
6587
    }
6588
6589
    /**
6590
     * Returns the index of the last occurrence of $needle in the string,
6591
     * and false if not found. Accepts an optional offset from which to begin
6592
     * the search. Offsets may be negative to count from the last character
6593
     * in the string.
6594
     *
6595
     * @param string $str      <p>The input string.</p>
6596
     * @param string $needle   <p>Substring to look for.</p>
6597
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6598
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6599
     *
6600
     * @psalm-pure
6601
     *
6602
     * @return false|int
6603
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6604
     *
6605
     * @see        UTF8::strripos()
6606
     * @deprecated <p>please use "UTF8::strripos()"</p>
6607
     */
6608 10
    public static function str_iindex_last(
6609
        string $str,
6610
        string $needle,
6611
        int $offset = 0,
6612
        string $encoding = 'UTF-8'
6613
    ) {
6614 10
        return self::strripos(
6615 10
            $str,
6616
            $needle,
6617
            $offset,
6618
            $encoding
6619
        );
6620
    }
6621
6622
    /**
6623
     * Returns the index of the first occurrence of $needle in the string,
6624
     * and false if not found. Accepts an optional offset from which to begin
6625
     * the search.
6626
     *
6627
     * @param string $str      <p>The input string.</p>
6628
     * @param string $needle   <p>Substring to look for.</p>
6629
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6630
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6631
     *
6632
     * @psalm-pure
6633
     *
6634
     * @return false|int
6635
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6636
     *
6637
     * @see        UTF8::strpos()
6638
     * @deprecated <p>please use "UTF8::strpos()"</p>
6639
     */
6640 11
    public static function str_index_first(
6641
        string $str,
6642
        string $needle,
6643
        int $offset = 0,
6644
        string $encoding = 'UTF-8'
6645
    ) {
6646 11
        return self::strpos(
6647 11
            $str,
6648
            $needle,
6649
            $offset,
6650
            $encoding
6651
        );
6652
    }
6653
6654
    /**
6655
     * Returns the index of the last occurrence of $needle in the string,
6656
     * and false if not found. Accepts an optional offset from which to begin
6657
     * the search. Offsets may be negative to count from the last character
6658
     * in the string.
6659
     *
6660
     * @param string $str      <p>The input string.</p>
6661
     * @param string $needle   <p>Substring to look for.</p>
6662
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6663
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6664
     *
6665
     * @psalm-pure
6666
     *
6667
     * @return false|int
6668
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6669
     *
6670
     * @see        UTF8::strrpos()
6671
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6672
     */
6673 10
    public static function str_index_last(
6674
        string $str,
6675
        string $needle,
6676
        int $offset = 0,
6677
        string $encoding = 'UTF-8'
6678
    ) {
6679 10
        return self::strrpos(
6680 10
            $str,
6681
            $needle,
6682
            $offset,
6683
            $encoding
6684
        );
6685
    }
6686
6687
    /**
6688
     * Inserts $substring into the string at the $index provided.
6689
     *
6690
     * @param string $str       <p>The input string.</p>
6691
     * @param string $substring <p>String to be inserted.</p>
6692
     * @param int    $index     <p>The index at which to insert the substring.</p>
6693
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6694
     *
6695
     * @psalm-pure
6696
     *
6697
     * @return string
6698
     */
6699 8
    public static function str_insert(
6700
        string $str,
6701
        string $substring,
6702
        int $index,
6703
        string $encoding = 'UTF-8'
6704
    ): string {
6705 8
        if ($encoding === 'UTF-8') {
6706 4
            $len = (int) \mb_strlen($str);
6707 4
            if ($index > $len) {
6708
                return $str;
6709
            }
6710
6711
            /** @noinspection UnnecessaryCastingInspection */
6712 4
            return (string) \mb_substr($str, 0, $index) .
6713 4
                   $substring .
6714 4
                   (string) \mb_substr($str, $index, $len);
6715
        }
6716
6717 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6718
6719 4
        $len = (int) self::strlen($str, $encoding);
6720 4
        if ($index > $len) {
6721 1
            return $str;
6722
        }
6723
6724 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6725 3
               $substring .
6726 3
               ((string) self::substr($str, $index, $len, $encoding));
6727
    }
6728
6729
    /**
6730
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6731
     *
6732
     * EXAMPLE: <code>
6733
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6734
     * </code>
6735
     *
6736
     * @see http://php.net/manual/en/function.str-ireplace.php
6737
     *
6738
     * @param string|string[] $search      <p>
6739
     *                                     Every replacement with search array is
6740
     *                                     performed on the result of previous replacement.
6741
     *                                     </p>
6742
     * @param string|string[] $replacement <p>The replacement.</p>
6743
     * @param string|string[] $subject     <p>
6744
     *                                     If subject is an array, then the search and
6745
     *                                     replace is performed with every entry of
6746
     *                                     subject, and the return value is an array as
6747
     *                                     well.
6748
     *                                     </p>
6749
     * @param int             $count       [optional] <p>
6750
     *                                     The number of matched and replaced needles will
6751
     *                                     be returned in count which is passed by
6752
     *                                     reference.
6753
     *                                     </p>
6754
     *
6755
     * @psalm-pure
6756
     *
6757
     * @return string|string[]
6758
     *                         <p>A string or an array of replacements.</p>
6759
     *
6760
     * @template TStrIReplaceSubject
6761
     * @phpstan-param TStrIReplaceSubject $subject
6762
     * @phpstan-return TStrIReplaceSubject
6763
     */
6764 29
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6765
    {
6766 29
        $search = (array) $search;
6767
6768
        /** @noinspection AlterInForeachInspection */
6769 29
        foreach ($search as &$s) {
6770 29
            $s = (string) $s;
6771 29
            if ($s === '') {
6772 6
                $s = '/^(?<=.)$/';
6773
            } else {
6774 24
                $s = '/' . \preg_quote($s, '/') . '/ui';
6775
            }
6776
        }
6777
6778
        // fallback
6779
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6780 29
        if ($replacement === null) {
0 ignored issues
show
introduced by
The condition $replacement === null is always false.
Loading history...
6781 1
            $replacement = '';
6782
        }
6783
        /** @phpstan-ignore-next-line - only a fallback for PHP8 */
6784 29
        if ($subject === null) {
6785 1
            $subject = '';
6786
        }
6787
6788
        /**
6789
         * @psalm-suppress PossiblyNullArgument
6790
         * @phpstan-var TStrIReplaceSubject $subject
6791
         */
6792 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6793
6794 29
        return $subject;
6795
    }
6796
6797
    /**
6798
     * Replaces $search from the beginning of string with $replacement.
6799
     *
6800
     * @param string $str         <p>The input string.</p>
6801
     * @param string $search      <p>The string to search for.</p>
6802
     * @param string $replacement <p>The replacement.</p>
6803
     *
6804
     * @psalm-pure
6805
     *
6806
     * @return string
6807
     *                <p>The string after the replacement.</p>
6808
     */
6809 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6810
    {
6811 17
        if ($str === '') {
6812 4
            if ($replacement === '') {
6813 2
                return '';
6814
            }
6815
6816 2
            if ($search === '') {
6817 2
                return $replacement;
6818
            }
6819
        }
6820
6821 13
        if ($search === '') {
6822 2
            return $str . $replacement;
6823
        }
6824
6825 11
        $searchLength = \strlen($search);
6826 11
        if (\strncasecmp($str, $search, $searchLength) === 0) {
6827 10
            return $replacement . \substr($str, $searchLength);
6828
        }
6829
6830 1
        return $str;
6831
    }
6832
6833
    /**
6834
     * Replaces $search from the ending of string with $replacement.
6835
     *
6836
     * @param string $str         <p>The input string.</p>
6837
     * @param string $search      <p>The string to search for.</p>
6838
     * @param string $replacement <p>The replacement.</p>
6839
     *
6840
     * @psalm-pure
6841
     *
6842
     * @return string
6843
     *                <p>The string after the replacement.</p>
6844
     */
6845 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6846
    {
6847 17
        if ($str === '') {
6848 4
            if ($replacement === '') {
6849 2
                return '';
6850
            }
6851
6852 2
            if ($search === '') {
6853 2
                return $replacement;
6854
            }
6855
        }
6856
6857 13
        if ($search === '') {
6858 2
            return $str . $replacement;
6859
        }
6860
6861 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6862 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6863
        }
6864
6865 11
        return $str;
6866
    }
6867
6868
    /**
6869
     * Check if the string starts with the given substring, case-insensitive.
6870
     *
6871
     * EXAMPLE: <code>
6872
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6873
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6874
     * </code>
6875
     *
6876
     * @param string $haystack <p>The string to search in.</p>
6877
     * @param string $needle   <p>The substring to search for.</p>
6878
     *
6879
     * @psalm-pure
6880
     *
6881
     * @return bool
6882
     */
6883 13
    public static function str_istarts_with(string $haystack, string $needle): bool
6884
    {
6885 13
        if ($needle === '') {
6886 2
            return true;
6887
        }
6888
6889 13
        if ($haystack === '') {
6890
            return false;
6891
        }
6892
6893 13
        return self::stripos($haystack, $needle) === 0;
6894
    }
6895
6896
    /**
6897
     * Returns true if the string begins with any of $substrings, false otherwise.
6898
     *
6899
     * - case-insensitive
6900
     *
6901
     * @param string $str        <p>The input string.</p>
6902
     * @param array  $substrings <p>Substrings to look for.</p>
6903
     *
6904
     * @psalm-pure
6905
     *
6906
     * @return bool
6907
     *              <p>Whether or not $str starts with $substring.</p>
6908
     */
6909 5
    public static function str_istarts_with_any(string $str, array $substrings): bool
6910
    {
6911 5
        if ($str === '') {
6912
            return false;
6913
        }
6914
6915 5
        if ($substrings === []) {
6916
            return false;
6917
        }
6918
6919 5
        foreach ($substrings as &$substring) {
6920 5
            if (self::str_istarts_with($str, $substring)) {
6921 5
                return true;
6922
            }
6923
        }
6924
6925 1
        return false;
6926
    }
6927
6928
    /**
6929
     * Gets the substring after the first occurrence of a separator.
6930
     *
6931
     * @param string $str       <p>The input string.</p>
6932
     * @param string $separator <p>The string separator.</p>
6933
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6934
     *
6935
     * @psalm-pure
6936
     *
6937
     * @return string
6938
     */
6939 1
    public static function str_isubstr_after_first_separator(
6940
        string $str,
6941
        string $separator,
6942
        string $encoding = 'UTF-8'
6943
    ): string {
6944 1
        if ($separator === '' || $str === '') {
6945 1
            return '';
6946
        }
6947
6948 1
        $offset = self::stripos($str, $separator);
6949 1
        if ($offset === false) {
6950 1
            return '';
6951
        }
6952
6953 1
        if ($encoding === 'UTF-8') {
6954 1
            return (string) \mb_substr(
6955 1
                $str,
6956 1
                $offset + (int) \mb_strlen($separator)
6957
            );
6958
        }
6959
6960
        return (string) self::substr(
6961
            $str,
6962
            $offset + (int) self::strlen($separator, $encoding),
6963
            null,
6964
            $encoding
6965
        );
6966
    }
6967
6968
    /**
6969
     * Gets the substring after the last occurrence of a separator.
6970
     *
6971
     * @param string $str       <p>The input string.</p>
6972
     * @param string $separator <p>The string separator.</p>
6973
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6974
     *
6975
     * @psalm-pure
6976
     *
6977
     * @return string
6978
     */
6979 1
    public static function str_isubstr_after_last_separator(
6980
        string $str,
6981
        string $separator,
6982
        string $encoding = 'UTF-8'
6983
    ): string {
6984 1
        if ($separator === '' || $str === '') {
6985 1
            return '';
6986
        }
6987
6988 1
        $offset = self::strripos($str, $separator);
6989 1
        if ($offset === false) {
6990 1
            return '';
6991
        }
6992
6993 1
        if ($encoding === 'UTF-8') {
6994 1
            return (string) \mb_substr(
6995 1
                $str,
6996 1
                $offset + (int) self::strlen($separator)
6997
            );
6998
        }
6999
7000
        return (string) self::substr(
7001
            $str,
7002
            $offset + (int) self::strlen($separator, $encoding),
7003
            null,
7004
            $encoding
7005
        );
7006
    }
7007
7008
    /**
7009
     * Gets the substring before the first occurrence of a separator.
7010
     *
7011
     * @param string $str       <p>The input string.</p>
7012
     * @param string $separator <p>The string separator.</p>
7013
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7014
     *
7015
     * @psalm-pure
7016
     *
7017
     * @return string
7018
     */
7019 1
    public static function str_isubstr_before_first_separator(
7020
        string $str,
7021
        string $separator,
7022
        string $encoding = 'UTF-8'
7023
    ): string {
7024 1
        if ($separator === '' || $str === '') {
7025 1
            return '';
7026
        }
7027
7028 1
        $offset = self::stripos($str, $separator);
7029 1
        if ($offset === false) {
7030 1
            return '';
7031
        }
7032
7033 1
        if ($encoding === 'UTF-8') {
7034 1
            return (string) \mb_substr($str, 0, $offset);
7035
        }
7036
7037
        return (string) self::substr($str, 0, $offset, $encoding);
7038
    }
7039
7040
    /**
7041
     * Gets the substring before the last occurrence of a separator.
7042
     *
7043
     * @param string $str       <p>The input string.</p>
7044
     * @param string $separator <p>The string separator.</p>
7045
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7046
     *
7047
     * @psalm-pure
7048
     *
7049
     * @return string
7050
     */
7051 1
    public static function str_isubstr_before_last_separator(
7052
        string $str,
7053
        string $separator,
7054
        string $encoding = 'UTF-8'
7055
    ): string {
7056 1
        if ($separator === '' || $str === '') {
7057 1
            return '';
7058
        }
7059
7060 1
        if ($encoding === 'UTF-8') {
7061 1
            $offset = \mb_strripos($str, $separator);
7062 1
            if ($offset === false) {
7063 1
                return '';
7064
            }
7065
7066 1
            return (string) \mb_substr($str, 0, $offset);
7067
        }
7068
7069
        $offset = self::strripos($str, $separator, 0, $encoding);
7070
        if ($offset === false) {
7071
            return '';
7072
        }
7073
7074
        return (string) self::substr($str, 0, $offset, $encoding);
7075
    }
7076
7077
    /**
7078
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7079
     *
7080
     * @param string $str           <p>The input string.</p>
7081
     * @param string $needle        <p>The string to look for.</p>
7082
     * @param bool   $before_needle [optional] <p>Default: false</p>
7083
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7084
     *
7085
     * @psalm-pure
7086
     *
7087
     * @return string
7088
     */
7089 2
    public static function str_isubstr_first(
7090
        string $str,
7091
        string $needle,
7092
        bool $before_needle = false,
7093
        string $encoding = 'UTF-8'
7094
    ): string {
7095
        if (
7096 2
            $needle === ''
7097
            ||
7098 2
            $str === ''
7099
        ) {
7100 2
            return '';
7101
        }
7102
7103 2
        $part = self::stristr(
7104 2
            $str,
7105
            $needle,
7106
            $before_needle,
7107
            $encoding
7108
        );
7109 2
        if ($part === false) {
7110 2
            return '';
7111
        }
7112
7113 2
        return $part;
7114
    }
7115
7116
    /**
7117
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7118
     *
7119
     * @param string $str           <p>The input string.</p>
7120
     * @param string $needle        <p>The string to look for.</p>
7121
     * @param bool   $before_needle [optional] <p>Default: false</p>
7122
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7123
     *
7124
     * @psalm-pure
7125
     *
7126
     * @return string
7127
     */
7128 1
    public static function str_isubstr_last(
7129
        string $str,
7130
        string $needle,
7131
        bool $before_needle = false,
7132
        string $encoding = 'UTF-8'
7133
    ): string {
7134
        if (
7135 1
            $needle === ''
7136
            ||
7137 1
            $str === ''
7138
        ) {
7139 1
            return '';
7140
        }
7141
7142 1
        $part = self::strrichr(
7143 1
            $str,
7144
            $needle,
7145
            $before_needle,
7146
            $encoding
7147
        );
7148 1
        if ($part === false) {
7149 1
            return '';
7150
        }
7151
7152 1
        return $part;
7153
    }
7154
7155
    /**
7156
     * Returns the last $n characters of the string.
7157
     *
7158
     * @param string $str      <p>The input string.</p>
7159
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7160
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7161
     *
7162
     * @psalm-pure
7163
     *
7164
     * @return string
7165
     */
7166 12
    public static function str_last_char(
7167
        string $str,
7168
        int $n = 1,
7169
        string $encoding = 'UTF-8'
7170
    ): string {
7171 12
        if ($str === '' || $n <= 0) {
7172 4
            return '';
7173
        }
7174
7175 8
        if ($encoding === 'UTF-8') {
7176 4
            return (string) \mb_substr($str, -$n);
7177
        }
7178
7179 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7180
7181 4
        return (string) self::substr($str, -$n, null, $encoding);
7182
    }
7183
7184
    /**
7185
     * Limit the number of characters in a string.
7186
     *
7187
     * @param string $str        <p>The input string.</p>
7188
     * @param int    $length     [optional] <p>Default: 100</p>
7189
     * @param string $str_add_on [optional] <p>Default: …</p>
7190
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7191
     *
7192
     * @psalm-pure
7193
     *
7194
     * @return string
7195
     */
7196 2
    public static function str_limit(
7197
        string $str,
7198
        int $length = 100,
7199
        string $str_add_on = '…',
7200
        string $encoding = 'UTF-8'
7201
    ): string {
7202 2
        if ($str === '' || $length <= 0) {
7203 2
            return '';
7204
        }
7205
7206 2
        if ($encoding === 'UTF-8') {
7207 2
            if ((int) \mb_strlen($str) <= $length) {
7208 2
                return $str;
7209
            }
7210
7211
            /** @noinspection UnnecessaryCastingInspection */
7212 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7213
        }
7214
7215
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7216
7217
        if ((int) self::strlen($str, $encoding) <= $length) {
7218
            return $str;
7219
        }
7220
7221
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7222
    }
7223
7224
    /**
7225
     * Limit the number of characters in a string, but also after the next word.
7226
     *
7227
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7228
     *
7229
     * @param string $str        <p>The input string.</p>
7230
     * @param int    $length     [optional] <p>Default: 100</p>
7231
     * @param string $str_add_on [optional] <p>Default: …</p>
7232
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7233
     *
7234
     * @psalm-pure
7235
     *
7236
     * @return string
7237
     */
7238 6
    public static function str_limit_after_word(
7239
        string $str,
7240
        int $length = 100,
7241
        string $str_add_on = '…',
7242
        string $encoding = 'UTF-8'
7243
    ): string {
7244 6
        if ($str === '' || $length <= 0) {
7245 2
            return '';
7246
        }
7247
7248 6
        if ($encoding === 'UTF-8') {
7249
            /** @noinspection UnnecessaryCastingInspection */
7250 2
            if ((int) \mb_strlen($str) <= $length) {
7251 2
                return $str;
7252
            }
7253
7254 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7255 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7256
            }
7257
7258 2
            $str = \mb_substr($str, 0, $length);
7259
7260 2
            $array = \explode(' ', $str, -1);
7261 2
            $new_str = \implode(' ', $array);
7262
7263 2
            if ($new_str === '') {
7264 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7265
            }
7266
        } else {
7267 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7268
                return $str;
7269
            }
7270
7271 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7272 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7273
            }
7274
7275
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7276 1
            $str = self::substr($str, 0, $length, $encoding);
7277
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7278 1
            if ($str === false) {
7279
                return '' . $str_add_on;
7280
            }
7281
7282 1
            $array = \explode(' ', $str, -1);
7283 1
            $new_str = \implode(' ', $array);
7284
7285 1
            if ($new_str === '') {
7286
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7287
            }
7288
        }
7289
7290 3
        return $new_str . $str_add_on;
7291
    }
7292
7293
    /**
7294
     * Returns the longest common prefix between the $str1 and $str2.
7295
     *
7296
     * @param string $str1     <p>The input sting.</p>
7297
     * @param string $str2     <p>Second string for comparison.</p>
7298
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7299
     *
7300
     * @psalm-pure
7301
     *
7302
     * @return string
7303
     */
7304 10
    public static function str_longest_common_prefix(
7305
        string $str1,
7306
        string $str2,
7307
        string $encoding = 'UTF-8'
7308
    ): string {
7309
        // init
7310 10
        $longest_common_prefix = '';
7311
7312 10
        if ($encoding === 'UTF-8') {
7313 5
            $max_length = (int) \min(
7314 5
                \mb_strlen($str1),
7315 5
                \mb_strlen($str2)
7316
            );
7317
7318 5
            for ($i = 0; $i < $max_length; ++$i) {
7319 4
                $char = \mb_substr($str1, $i, 1);
7320
7321
                if (
7322 4
                    $char !== false
7323
                    &&
7324 4
                    $char === \mb_substr($str2, $i, 1)
7325
                ) {
7326 3
                    $longest_common_prefix .= $char;
7327
                } else {
7328 3
                    break;
7329
                }
7330
            }
7331
        } else {
7332 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7333
7334 5
            $max_length = (int) \min(
7335 5
                self::strlen($str1, $encoding),
7336 5
                self::strlen($str2, $encoding)
7337
            );
7338
7339 5
            for ($i = 0; $i < $max_length; ++$i) {
7340 4
                $char = self::substr($str1, $i, 1, $encoding);
7341
7342
                if (
7343 4
                    $char !== false
7344
                    &&
7345 4
                    $char === self::substr($str2, $i, 1, $encoding)
7346
                ) {
7347 3
                    $longest_common_prefix .= $char;
7348
                } else {
7349 3
                    break;
7350
                }
7351
            }
7352
        }
7353
7354 10
        return $longest_common_prefix;
7355
    }
7356
7357
    /**
7358
     * Returns the longest common substring between the $str1 and $str2.
7359
     * In the case of ties, it returns that which occurs first.
7360
     *
7361
     * @param string $str1
7362
     * @param string $str2     <p>Second string for comparison.</p>
7363
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7364
     *
7365
     * @psalm-pure
7366
     *
7367
     * @return string
7368
     *                <p>A string with its $str being the longest common substring.</p>
7369
     */
7370 11
    public static function str_longest_common_substring(
7371
        string $str1,
7372
        string $str2,
7373
        string $encoding = 'UTF-8'
7374
    ): string {
7375 11
        if ($str1 === '' || $str2 === '') {
7376 2
            return '';
7377
        }
7378
7379
        // Uses dynamic programming to solve
7380
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7381
7382 9
        if ($encoding === 'UTF-8') {
7383 4
            $str_length = (int) \mb_strlen($str1);
7384 4
            $other_length = (int) \mb_strlen($str2);
7385
        } else {
7386 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7387
7388 5
            $str_length = (int) self::strlen($str1, $encoding);
7389 5
            $other_length = (int) self::strlen($str2, $encoding);
7390
        }
7391
7392
        // Return if either string is empty
7393 9
        if ($str_length === 0 || $other_length === 0) {
7394
            return '';
7395
        }
7396
7397 9
        $len = 0;
7398 9
        $end = 0;
7399 9
        $table = \array_fill(
7400 9
            0,
7401 9
            $str_length + 1,
7402 9
            \array_fill(0, $other_length + 1, 0)
7403
        );
7404
7405 9
        if ($encoding === 'UTF-8') {
7406 9
            for ($i = 1; $i <= $str_length; ++$i) {
7407 9
                for ($j = 1; $j <= $other_length; ++$j) {
7408 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7409 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7410
7411 9
                    if ($str_char === $other_char) {
7412 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7413 8
                        if ($table[$i][$j] > $len) {
7414 8
                            $len = $table[$i][$j];
7415 8
                            $end = $i;
7416
                        }
7417
                    } else {
7418 9
                        $table[$i][$j] = 0;
7419
                    }
7420
                }
7421
            }
7422
        } else {
7423
            for ($i = 1; $i <= $str_length; ++$i) {
7424
                for ($j = 1; $j <= $other_length; ++$j) {
7425
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7426
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7427
7428
                    if ($str_char === $other_char) {
7429
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7430
                        if ($table[$i][$j] > $len) {
7431
                            $len = $table[$i][$j];
7432
                            $end = $i;
7433
                        }
7434
                    } else {
7435
                        $table[$i][$j] = 0;
7436
                    }
7437
                }
7438
            }
7439
        }
7440
7441 9
        if ($encoding === 'UTF-8') {
7442 9
            return (string) \mb_substr($str1, $end - $len, $len);
7443
        }
7444
7445
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7446
    }
7447
7448
    /**
7449
     * Returns the longest common suffix between the $str1 and $str2.
7450
     *
7451
     * @param string $str1
7452
     * @param string $str2     <p>Second string for comparison.</p>
7453
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7454
     *
7455
     * @psalm-pure
7456
     *
7457
     * @return string
7458
     */
7459 10
    public static function str_longest_common_suffix(
7460
        string $str1,
7461
        string $str2,
7462
        string $encoding = 'UTF-8'
7463
    ): string {
7464 10
        if ($str1 === '' || $str2 === '') {
7465 2
            return '';
7466
        }
7467
7468 8
        if ($encoding === 'UTF-8') {
7469 4
            $max_length = (int) \min(
7470 4
                \mb_strlen($str1, $encoding),
7471 4
                \mb_strlen($str2, $encoding)
7472
            );
7473
7474 4
            $longest_common_suffix = '';
7475 4
            for ($i = 1; $i <= $max_length; ++$i) {
7476 4
                $char = \mb_substr($str1, -$i, 1);
7477
7478
                if (
7479 4
                    $char !== false
7480
                    &&
7481 4
                    $char === \mb_substr($str2, -$i, 1)
7482
                ) {
7483 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7484
                } else {
7485 3
                    break;
7486
                }
7487
            }
7488
        } else {
7489 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7490
7491 4
            $max_length = (int) \min(
7492 4
                self::strlen($str1, $encoding),
7493 4
                self::strlen($str2, $encoding)
7494
            );
7495
7496 4
            $longest_common_suffix = '';
7497 4
            for ($i = 1; $i <= $max_length; ++$i) {
7498 4
                $char = self::substr($str1, -$i, 1, $encoding);
7499
7500
                if (
7501 4
                    $char !== false
7502
                    &&
7503 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7504
                ) {
7505 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7506
                } else {
7507 3
                    break;
7508
                }
7509
            }
7510
        }
7511
7512 8
        return $longest_common_suffix;
7513
    }
7514
7515
    /**
7516
     * Returns true if $str matches the supplied pattern, false otherwise.
7517
     *
7518
     * @param string $str     <p>The input string.</p>
7519
     * @param string $pattern <p>Regex pattern to match against.</p>
7520
     *
7521
     * @psalm-pure
7522
     *
7523
     * @return bool
7524
     *              <p>Whether or not $str matches the pattern.</p>
7525
     */
7526 10
    public static function str_matches_pattern(string $str, string $pattern): bool
7527
    {
7528 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7529
    }
7530
7531
    /**
7532
     * Returns whether or not a character exists at an index. Offsets may be
7533
     * negative to count from the last character in the string. Implements
7534
     * part of the ArrayAccess interface.
7535
     *
7536
     * @param string $str      <p>The input string.</p>
7537
     * @param int    $offset   <p>The index to check.</p>
7538
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7539
     *
7540
     * @psalm-pure
7541
     *
7542
     * @return bool
7543
     *              <p>Whether or not the index exists.</p>
7544
     */
7545 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7546
    {
7547
        // init
7548 6
        $length = (int) self::strlen($str, $encoding);
7549
7550 6
        if ($offset >= 0) {
7551 3
            return $length > $offset;
7552
        }
7553
7554 3
        return $length >= \abs($offset);
7555
    }
7556
7557
    /**
7558
     * Returns the character at the given index. Offsets may be negative to
7559
     * count from the last character in the string. Implements part of the
7560
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7561
     * does not exist.
7562
     *
7563
     * @param string $str      <p>The input string.</p>
7564
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7565
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7566
     *
7567
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7568
     *
7569
     * @return string
7570
     *                <p>The character at the specified index.</p>
7571
     *
7572
     * @psalm-pure
7573
     */
7574 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7575
    {
7576
        // init
7577 2
        $length = (int) self::strlen($str);
7578
7579
        if (
7580 2
            ($index >= 0 && $length <= $index)
7581
            ||
7582 2
            $length < \abs($index)
7583
        ) {
7584 1
            throw new \OutOfBoundsException('No character exists at the index');
7585
        }
7586
7587 1
        return self::char_at($str, $index, $encoding);
7588
    }
7589
7590
    /**
7591
     * Pad a UTF-8 string to a given length with another string.
7592
     *
7593
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7594
     *
7595
     * @param string     $str        <p>The input string.</p>
7596
     * @param int        $pad_length <p>The length of return string.</p>
7597
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7598
     * @param int|string $pad_type   [optional] <p>
7599
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7600
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7601
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7602
     *                               </p>
7603
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7604
     *
7605
     * @psalm-pure
7606
     *
7607
     * @return string
7608
     *                <p>Returns the padded string.</p>
7609
     */
7610 41
    public static function str_pad(
7611
        string $str,
7612
        int $pad_length,
7613
        string $pad_string = ' ',
7614
        $pad_type = \STR_PAD_RIGHT,
7615
        string $encoding = 'UTF-8'
7616
    ): string {
7617 41
        if ($pad_length === 0 || $pad_string === '') {
7618 1
            return $str;
7619
        }
7620
7621 41
        if ($pad_type !== (int) $pad_type) {
7622 13
            if ($pad_type === 'left') {
7623 3
                $pad_type = \STR_PAD_LEFT;
7624 10
            } elseif ($pad_type === 'right') {
7625 6
                $pad_type = \STR_PAD_RIGHT;
7626 4
            } elseif ($pad_type === 'both') {
7627 3
                $pad_type = \STR_PAD_BOTH;
7628
            } else {
7629 1
                throw new \InvalidArgumentException(
7630 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7631
                );
7632
            }
7633
        }
7634
7635 40
        if ($encoding === 'UTF-8') {
7636 25
            $str_length = (int) \mb_strlen($str);
7637
7638 25
            if ($pad_length >= $str_length) {
7639 25
                switch ($pad_type) {
7640
                    case \STR_PAD_LEFT:
7641 8
                        $ps_length = (int) \mb_strlen($pad_string);
7642
7643 8
                        $diff = ($pad_length - $str_length);
7644
7645 8
                        $pre = (string) \mb_substr(
7646 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7647 8
                            0,
7648 8
                            $diff
7649
                        );
7650 8
                        $post = '';
7651
7652 8
                        break;
7653
7654
                    case \STR_PAD_BOTH:
7655 14
                        $diff = ($pad_length - $str_length);
7656
7657 14
                        $ps_length_left = (int) \floor($diff / 2);
7658
7659 14
                        $ps_length_right = (int) \ceil($diff / 2);
7660
7661 14
                        $pre = (string) \mb_substr(
7662 14
                            \str_repeat($pad_string, $ps_length_left),
7663 14
                            0,
7664 14
                            $ps_length_left
7665
                        );
7666 14
                        $post = (string) \mb_substr(
7667 14
                            \str_repeat($pad_string, $ps_length_right),
7668 14
                            0,
7669 14
                            $ps_length_right
7670
                        );
7671
7672 14
                        break;
7673
7674
                    case \STR_PAD_RIGHT:
7675
                    default:
7676 9
                        $ps_length = (int) \mb_strlen($pad_string);
7677
7678 9
                        $diff = ($pad_length - $str_length);
7679
7680 9
                        $post = (string) \mb_substr(
7681 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7682 9
                            0,
7683 9
                            $diff
7684
                        );
7685 9
                        $pre = '';
7686
                }
7687
7688 25
                return $pre . $str . $post;
7689
            }
7690
7691 3
            return $str;
7692
        }
7693
7694 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7695
7696 15
        $str_length = (int) self::strlen($str, $encoding);
7697
7698 15
        if ($pad_length >= $str_length) {
7699 14
            switch ($pad_type) {
7700
                case \STR_PAD_LEFT:
7701 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7702
7703 5
                    $diff = ($pad_length - $str_length);
7704
7705 5
                    $pre = (string) self::substr(
7706 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7707 5
                        0,
7708
                        $diff,
7709
                        $encoding
7710
                    );
7711 5
                    $post = '';
7712
7713 5
                    break;
7714
7715
                case \STR_PAD_BOTH:
7716 3
                    $diff = ($pad_length - $str_length);
7717
7718 3
                    $ps_length_left = (int) \floor($diff / 2);
7719
7720 3
                    $ps_length_right = (int) \ceil($diff / 2);
7721
7722 3
                    $pre = (string) self::substr(
7723 3
                        \str_repeat($pad_string, $ps_length_left),
7724 3
                        0,
7725
                        $ps_length_left,
7726
                        $encoding
7727
                    );
7728 3
                    $post = (string) self::substr(
7729 3
                        \str_repeat($pad_string, $ps_length_right),
7730 3
                        0,
7731
                        $ps_length_right,
7732
                        $encoding
7733
                    );
7734
7735 3
                    break;
7736
7737
                case \STR_PAD_RIGHT:
7738
                default:
7739 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7740
7741 6
                    $diff = ($pad_length - $str_length);
7742
7743 6
                    $post = (string) self::substr(
7744 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7745 6
                        0,
7746
                        $diff,
7747
                        $encoding
7748
                    );
7749 6
                    $pre = '';
7750
            }
7751
7752 14
            return $pre . $str . $post;
7753
        }
7754
7755 1
        return $str;
7756
    }
7757
7758
    /**
7759
     * Returns a new string of a given length such that both sides of the
7760
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7761
     *
7762
     * @param string $str
7763
     * @param int    $length   <p>Desired string length after padding.</p>
7764
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7765
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7766
     *
7767
     * @psalm-pure
7768
     *
7769
     * @return string
7770
     *                <p>The string with padding applied.</p>
7771
     */
7772 11
    public static function str_pad_both(
7773
        string $str,
7774
        int $length,
7775
        string $pad_str = ' ',
7776
        string $encoding = 'UTF-8'
7777
    ): string {
7778 11
        return self::str_pad(
7779 11
            $str,
7780 11
            $length,
7781 11
            $pad_str,
7782 11
            \STR_PAD_BOTH,
7783 11
            $encoding
7784
        );
7785
    }
7786
7787
    /**
7788
     * Returns a new string of a given length such that the beginning of the
7789
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7790
     *
7791
     * @param string $str
7792
     * @param int    $length   <p>Desired string length after padding.</p>
7793
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7794
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7795
     *
7796
     * @psalm-pure
7797
     *
7798
     * @return string
7799
     *                <p>The string with left padding.</p>
7800
     */
7801 7
    public static function str_pad_left(
7802
        string $str,
7803
        int $length,
7804
        string $pad_str = ' ',
7805
        string $encoding = 'UTF-8'
7806
    ): string {
7807 7
        return self::str_pad(
7808 7
            $str,
7809 7
            $length,
7810 7
            $pad_str,
7811 7
            \STR_PAD_LEFT,
7812 7
            $encoding
7813
        );
7814
    }
7815
7816
    /**
7817
     * Returns a new string of a given length such that the end of the string
7818
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7819
     *
7820
     * @param string $str
7821
     * @param int    $length   <p>Desired string length after padding.</p>
7822
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7823
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7824
     *
7825
     * @psalm-pure
7826
     *
7827
     * @return string
7828
     *                <p>The string with right padding.</p>
7829
     */
7830 7
    public static function str_pad_right(
7831
        string $str,
7832
        int $length,
7833
        string $pad_str = ' ',
7834
        string $encoding = 'UTF-8'
7835
    ): string {
7836 7
        return self::str_pad(
7837 7
            $str,
7838 7
            $length,
7839 7
            $pad_str,
7840 7
            \STR_PAD_RIGHT,
7841 7
            $encoding
7842
        );
7843
    }
7844
7845
    /**
7846
     * Repeat a string.
7847
     *
7848
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7849
     *
7850
     * @param string $str        <p>
7851
     *                           The string to be repeated.
7852
     *                           </p>
7853
     * @param int    $multiplier <p>
7854
     *                           Number of time the input string should be
7855
     *                           repeated.
7856
     *                           </p>
7857
     *                           <p>
7858
     *                           multiplier has to be greater than or equal to 0.
7859
     *                           If the multiplier is set to 0, the function
7860
     *                           will return an empty string.
7861
     *                           </p>
7862
     *
7863
     * @psalm-pure
7864
     *
7865
     * @return string
7866
     *                <p>The repeated string.</p>
7867
     */
7868 9
    public static function str_repeat(string $str, int $multiplier): string
7869
    {
7870 9
        $str = self::filter($str);
7871
7872 9
        return \str_repeat($str, $multiplier);
7873
    }
7874
7875
    /**
7876
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7877
     *
7878
     * Replace all occurrences of the search string with the replacement string
7879
     *
7880
     * @see http://php.net/manual/en/function.str-replace.php
7881
     *
7882
     * @param string|string[] $search  <p>
7883
     *                                 The value being searched for, otherwise known as the needle.
7884
     *                                 An array may be used to designate multiple needles.
7885
     *                                 </p>
7886
     * @param string|string[] $replace <p>
7887
     *                                 The replacement value that replaces found search
7888
     *                                 values. An array may be used to designate multiple replacements.
7889
     *                                 </p>
7890
     * @param string|string[] $subject <p>
7891
     *                                 The string or array of strings being searched and replaced on,
7892
     *                                 otherwise known as the haystack.
7893
     *                                 </p>
7894
     *                                 <p>
7895
     *                                 If subject is an array, then the search and
7896
     *                                 replace is performed with every entry of
7897
     *                                 subject, and the return value is an array as
7898
     *                                 well.
7899
     *                                 </p>
7900
     * @param int|null        $count   [optional] <p>
7901
     *                                 If passed, this will hold the number of matched and replaced needles.
7902
     *                                 </p>
7903
     *
7904
     * @psalm-pure
7905
     *
7906
     * @return string|string[]
7907
     *                         <p>This function returns a string or an array with the replaced values.</p>
7908
     *
7909
     * @template TStrReplaceSubject
7910
     * @phpstan-param TStrReplaceSubject $subject
7911
     * @phpstan-return TStrReplaceSubject
7912
     *
7913
     * @deprecated please use \str_replace() instead
7914
     */
7915 12
    public static function str_replace(
7916
        $search,
7917
        $replace,
7918
        $subject,
7919
        int &$count = null
7920
    ) {
7921
        /**
7922
         * @psalm-suppress PossiblyNullArgument
7923
         * @phpstan-var TStrReplaceSubject $return;
7924
         */
7925 12
        $return = \str_replace(
7926 12
            $search,
7927 12
            $replace,
7928 12
            $subject,
7929 12
            $count
7930
        );
7931
7932 12
        return $return;
7933
    }
7934
7935
    /**
7936
     * Replaces $search from the beginning of string with $replacement.
7937
     *
7938
     * @param string $str         <p>The input string.</p>
7939
     * @param string $search      <p>The string to search for.</p>
7940
     * @param string $replacement <p>The replacement.</p>
7941
     *
7942
     * @psalm-pure
7943
     *
7944
     * @return string
7945
     *                <p>A string after the replacements.</p>
7946
     */
7947 17
    public static function str_replace_beginning(
7948
        string $str,
7949
        string $search,
7950
        string $replacement
7951
    ): string {
7952 17
        if ($str === '') {
7953 4
            if ($replacement === '') {
7954 2
                return '';
7955
            }
7956
7957 2
            if ($search === '') {
7958 2
                return $replacement;
7959
            }
7960
        }
7961
7962 13
        if ($search === '') {
7963 2
            return $str . $replacement;
7964
        }
7965
7966 11
        $searchLength = \strlen($search);
7967 11
        if (\strncmp($str, $search, $searchLength) === 0) {
7968 9
            return $replacement . \substr($str, $searchLength);
7969
        }
7970
7971 2
        return $str;
7972
    }
7973
7974
    /**
7975
     * Replaces $search from the ending of string with $replacement.
7976
     *
7977
     * @param string $str         <p>The input string.</p>
7978
     * @param string $search      <p>The string to search for.</p>
7979
     * @param string $replacement <p>The replacement.</p>
7980
     *
7981
     * @psalm-pure
7982
     *
7983
     * @return string
7984
     *                <p>A string after the replacements.</p>
7985
     */
7986 17
    public static function str_replace_ending(
7987
        string $str,
7988
        string $search,
7989
        string $replacement
7990
    ): string {
7991 17
        if ($str === '') {
7992 4
            if ($replacement === '') {
7993 2
                return '';
7994
            }
7995
7996 2
            if ($search === '') {
7997 2
                return $replacement;
7998
            }
7999
        }
8000
8001 13
        if ($search === '') {
8002 2
            return $str . $replacement;
8003
        }
8004
8005 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
8006 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
8007
        }
8008
8009 11
        return $str;
8010
    }
8011
8012
    /**
8013
     * Replace the first "$search"-term with the "$replace"-term.
8014
     *
8015
     * @param string $search
8016
     * @param string $replace
8017
     * @param string $subject
8018
     *
8019
     * @psalm-pure
8020
     *
8021
     * @return string
8022
     *
8023
     * @psalm-suppress InvalidReturnType
8024
     */
8025 2
    public static function str_replace_first(
8026
        string $search,
8027
        string $replace,
8028
        string $subject
8029
    ): string {
8030 2
        $pos = self::strpos($subject, $search);
8031
8032 2
        if ($pos !== false) {
8033
            /**
8034
             * @psalm-suppress InvalidReturnStatement
8035
             */
8036 2
            return self::substr_replace(
8037 2
                $subject,
8038
                $replace,
8039
                $pos,
8040 2
                (int) self::strlen($search)
8041
            );
8042
        }
8043
8044
        return $subject;
8045
    }
8046
8047
    /**
8048
     * Replace the last "$search"-term with the "$replace"-term.
8049
     *
8050
     * @param string $search
8051
     * @param string $replace
8052
     * @param string $subject
8053
     *
8054
     * @psalm-pure
8055
     *
8056
     * @return string
8057
     *
8058
     * @psalm-suppress InvalidReturnType
8059
     */
8060 2
    public static function str_replace_last(
8061
        string $search,
8062
        string $replace,
8063
        string $subject
8064
    ): string {
8065 2
        $pos = self::strrpos($subject, $search);
8066 2
        if ($pos !== false) {
8067
            /**
8068
             * @psalm-suppress InvalidReturnStatement
8069
             */
8070 2
            return self::substr_replace(
8071 2
                $subject,
8072
                $replace,
8073
                $pos,
8074 2
                (int) self::strlen($search)
8075
            );
8076
        }
8077
8078
        return $subject;
8079
    }
8080
8081
    /**
8082
     * Shuffles all the characters in the string.
8083
     *
8084
     * INFO: uses random algorithm which is weak for cryptography purposes
8085
     *
8086
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8087
     *
8088
     * @param string $str      <p>The input string</p>
8089
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8090
     *
8091
     * @return string
8092
     *                <p>The shuffled string.</p>
8093
     */
8094 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8095
    {
8096 5
        if ($encoding === 'UTF-8') {
8097 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8098
            /** @noinspection NonSecureShuffleUsageInspection */
8099 5
            \shuffle($indexes);
8100
8101
            // init
8102 5
            $shuffled_str = '';
8103
8104 5
            foreach ($indexes as &$i) {
8105 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8106 5
                if ($tmp_sub_str !== false) {
8107 5
                    $shuffled_str .= $tmp_sub_str;
8108
                }
8109
            }
8110
        } else {
8111
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8112
8113
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8114
            /** @noinspection NonSecureShuffleUsageInspection */
8115
            \shuffle($indexes);
8116
8117
            // init
8118
            $shuffled_str = '';
8119
8120
            foreach ($indexes as &$i) {
8121
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8122
                if ($tmp_sub_str !== false) {
8123
                    $shuffled_str .= $tmp_sub_str;
8124
                }
8125
            }
8126
        }
8127
8128 5
        return $shuffled_str;
8129
    }
8130
8131
    /**
8132
     * Returns the substring beginning at $start, and up to, but not including
8133
     * the index specified by $end. If $end is omitted, the function extracts
8134
     * the remaining string. If $end is negative, it is computed from the end
8135
     * of the string.
8136
     *
8137
     * @param string   $str
8138
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8139
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8140
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8141
     *
8142
     * @psalm-pure
8143
     *
8144
     * @return false|string
8145
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8146
     *                      characters long, <b>FALSE</b> will be returned.
8147
     */
8148 18
    public static function str_slice(
8149
        string $str,
8150
        int $start,
8151
        int $end = null,
8152
        string $encoding = 'UTF-8'
8153
    ) {
8154 18
        if ($encoding === 'UTF-8') {
8155 7
            if ($end === null) {
8156 1
                $length = (int) \mb_strlen($str);
8157 6
            } elseif ($end >= 0 && $end <= $start) {
8158 2
                return '';
8159 4
            } elseif ($end < 0) {
8160 1
                $length = (int) \mb_strlen($str) + $end - $start;
8161
            } else {
8162 3
                $length = $end - $start;
8163
            }
8164
8165 5
            return \mb_substr($str, $start, $length);
8166
        }
8167
8168 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8169
8170 11
        if ($end === null) {
8171 5
            $length = (int) self::strlen($str, $encoding);
8172 6
        } elseif ($end >= 0 && $end <= $start) {
8173 2
            return '';
8174 4
        } elseif ($end < 0) {
8175 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8176
        } else {
8177 3
            $length = $end - $start;
8178
        }
8179
8180 9
        return self::substr($str, $start, $length, $encoding);
8181
    }
8182
8183
    /**
8184
     * Convert a string to e.g.: "snake_case"
8185
     *
8186
     * @param string $str
8187
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8188
     *
8189
     * @psalm-pure
8190
     *
8191
     * @return string
8192
     *                <p>A string in snake_case.</p>
8193
     */
8194 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8195
    {
8196 22
        if ($str === '') {
8197
            return '';
8198
        }
8199
8200 22
        $str = \str_replace(
8201 22
            '-',
8202 22
            '_',
8203 22
            self::normalize_whitespace($str)
8204
        );
8205
8206 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8207 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8208
        }
8209
8210 22
        $str = (string) \preg_replace_callback(
8211 22
            '/([\\p{N}|\\p{Lu}])/u',
8212
            /**
8213
             * @param string[] $matches
8214
             *
8215
             * @psalm-pure
8216
             *
8217
             * @return string
8218
             */
8219 22
            static function (array $matches) use ($encoding): string {
8220 9
                $match = $matches[1];
8221 9
                $match_int = (int) $match;
8222
8223 9
                if ((string) $match_int === $match) {
8224 4
                    return '_' . $match . '_';
8225
                }
8226
8227 5
                if ($encoding === 'UTF-8') {
8228 5
                    return '_' . \mb_strtolower($match);
8229
                }
8230
8231
                return '_' . self::strtolower($match, $encoding);
8232 22
            },
8233 22
            $str
8234
        );
8235
8236 22
        $str = (string) \preg_replace(
8237
            [
8238 22
                '/\\s+/u',           // convert spaces to "_"
8239
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8240
                '/_+/',                 // remove double "_"
8241
            ],
8242
            [
8243 22
                '_',
8244
                '',
8245
                '_',
8246
            ],
8247 22
            $str
8248
        );
8249
8250 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8251
    }
8252
8253
    /**
8254
     * Sort all characters according to code points.
8255
     *
8256
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8257
     *
8258
     * @param string $str    <p>A UTF-8 string.</p>
8259
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8260
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8261
     *
8262
     * @psalm-pure
8263
     *
8264
     * @return string
8265
     *                <p>A string of sorted characters.</p>
8266
     */
8267 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8268
    {
8269 2
        $array = self::codepoints($str);
8270
8271 2
        if ($unique) {
8272 2
            $array = \array_flip(\array_flip($array));
8273
        }
8274
8275 2
        if ($desc) {
8276 2
            \arsort($array);
8277
        } else {
8278 2
            \asort($array);
8279
        }
8280
8281 2
        return self::string($array);
8282
    }
8283
8284
    /**
8285
     * Convert a string to an array of Unicode characters.
8286
     *
8287
     * EXAMPLE: <code>
8288
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8289
     * </code>
8290
     *
8291
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8292
     * @param int            $length                  [optional] <p>Max character length of each array
8293
     *                                                lement.</p>
8294
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8295
     *                                                string.</p>
8296
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8297
     *                                                "mb_substr"</p>
8298
     *
8299
     * @psalm-pure
8300
     *
8301
     * @return string[][]
8302
     *                    <p>An array containing chunks of the input.</p>
8303
     */
8304 1
    public static function str_split_array(
8305
        array $input,
8306
        int $length = 1,
8307
        bool $clean_utf8 = false,
8308
        bool $try_to_use_mb_functions = true
8309
    ): array {
8310 1
        foreach ($input as &$v) {
8311 1
            $v = self::str_split(
8312 1
                $v,
8313
                $length,
8314
                $clean_utf8,
8315
                $try_to_use_mb_functions
8316
            );
8317
        }
8318
8319
        /** @var string[][] $input */
8320 1
        return $input;
8321
    }
8322
8323
    /**
8324
     * Convert a string to an array of unicode characters.
8325
     *
8326
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8327
     *
8328
     * @param int|string $input                   <p>The string or int to split into array.</p>
8329
     * @param int        $length                  [optional] <p>Max character length of each array
8330
     *                                            element.</p>
8331
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8332
     *                                            string.</p>
8333
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8334
     *                                            "mb_substr"</p>
8335
     *
8336
     * @psalm-pure
8337
     *
8338
     * @return string[]
8339
     *                  <p>An array containing chunks of chars from the input.</p>
8340
     *
8341
     * @noinspection SuspiciousBinaryOperationInspection
8342
     * @noinspection OffsetOperationsInspection
8343
     */
8344 90
    public static function str_split(
8345
        $input,
8346
        int $length = 1,
8347
        bool $clean_utf8 = false,
8348
        bool $try_to_use_mb_functions = true
8349
    ): array {
8350 90
        if ($length <= 0) {
8351 3
            return [];
8352
        }
8353
8354
        // this is only an old fallback
8355
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8356
        /** @var int|int[]|string|string[] $input */
8357 89
        $input = $input;
8358 89
        if (\is_array($input)) {
8359
            /** @psalm-suppress InvalidReturnStatement */
8360
            /** @phpstan-ignore-next-line - old code :/ */
8361
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8362
                $input,
8363
                $length,
8364
                $clean_utf8,
8365
                $try_to_use_mb_functions
8366
            );
8367
        }
8368
8369
        // init
8370 89
        $input = (string) $input;
8371
8372 89
        if ($input === '') {
8373 14
            return [];
8374
        }
8375
8376 86
        if ($clean_utf8) {
8377 19
            $input = self::clean($input);
8378
        }
8379
8380
        if (
8381 86
            $try_to_use_mb_functions
8382
            &&
8383 86
            self::$SUPPORT['mbstring'] === true
8384
        ) {
8385 82
            if (\function_exists('mb_str_split')) {
8386
                /**
8387
                 * @psalm-suppress ImpureFunctionCall - why?
8388
                 */
8389 82
                $return = \mb_str_split($input, $length);
8390 82
                if ($return !== false) {
8391 82
                    return $return;
8392
                }
8393
            }
8394
8395
            $i_max = \mb_strlen($input);
8396
            if ($i_max <= 127) {
8397
                $ret = [];
8398
                for ($i = 0; $i < $i_max; ++$i) {
8399
                    $ret[] = \mb_substr($input, $i, 1);
8400
                }
8401
            } else {
8402
                $return_array = [];
8403
                \preg_match_all('/./us', $input, $return_array);
8404
                $ret = $return_array[0] ?? [];
8405
            }
8406 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8407 17
            $return_array = [];
8408 17
            \preg_match_all('/./us', $input, $return_array);
8409 17
            $ret = $return_array[0] ?? [];
8410
        } else {
8411
8412
            // fallback
8413
8414 8
            $ret = [];
8415 8
            $len = \strlen($input);
8416
8417
            /** @noinspection ForeachInvariantsInspection */
8418 8
            for ($i = 0; $i < $len; ++$i) {
8419 8
                if (($input[$i] & "\x80") === "\x00") {
8420 8
                    $ret[] = $input[$i];
8421
                } elseif (
8422 8
                    isset($input[$i + 1])
8423
                    &&
8424 8
                    ($input[$i] & "\xE0") === "\xC0"
8425
                ) {
8426 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8427 4
                        $ret[] = $input[$i] . $input[$i + 1];
8428
8429 4
                        ++$i;
8430
                    }
8431
                } elseif (
8432 6
                    isset($input[$i + 2])
8433
                    &&
8434 6
                    ($input[$i] & "\xF0") === "\xE0"
8435
                ) {
8436
                    if (
8437 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8438
                        &&
8439 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8440
                    ) {
8441 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8442
8443 6
                        $i += 2;
8444
                    }
8445
                } elseif (
8446
                    isset($input[$i + 3])
8447
                    &&
8448
                    ($input[$i] & "\xF8") === "\xF0"
8449
                ) {
8450
                    if (
8451
                        ($input[$i + 1] & "\xC0") === "\x80"
8452
                        &&
8453
                        ($input[$i + 2] & "\xC0") === "\x80"
8454
                        &&
8455
                        ($input[$i + 3] & "\xC0") === "\x80"
8456
                    ) {
8457
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8458
8459
                        $i += 3;
8460
                    }
8461
                }
8462
            }
8463
        }
8464
8465 23
        if ($length > 1) {
8466 2
            return \array_map(
8467 2
                static function (array $item): string {
8468 2
                    return \implode('', $item);
8469 2
                },
8470 2
                \array_chunk($ret, $length)
8471
            );
8472
        }
8473
8474 23
        if (isset($ret[0]) && $ret[0] === '') {
8475
            return [];
8476
        }
8477
8478 23
        return $ret;
8479
    }
8480
8481
    /**
8482
     * Splits the string with the provided regular expression, returning an
8483
     * array of strings. An optional integer $limit will truncate the
8484
     * results.
8485
     *
8486
     * @param string $str
8487
     * @param string $pattern <p>The regex with which to split the string.</p>
8488
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8489
     *
8490
     * @psalm-pure
8491
     *
8492
     * @return string[]
8493
     *                  <p>An array of strings.</p>
8494
     */
8495 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8496
    {
8497 16
        if ($limit === 0) {
8498 2
            return [];
8499
        }
8500
8501 14
        if ($pattern === '') {
8502 1
            return [$str];
8503
        }
8504
8505 13
        if (self::$SUPPORT['mbstring'] === true) {
8506 13
            if ($limit >= 0) {
8507
                /** @noinspection PhpComposerExtensionStubsInspection */
8508 8
                $result_tmp = \mb_split($pattern, $str);
8509 8
                if ($result_tmp === false) {
8510
                    return [];
8511
                }
8512
8513 8
                $result = [];
8514 8
                foreach ($result_tmp as $item_tmp) {
8515 8
                    if ($limit === 0) {
8516 4
                        break;
8517
                    }
8518 8
                    --$limit;
8519
8520 8
                    $result[] = $item_tmp;
8521
                }
8522
8523 8
                return $result;
8524
            }
8525
8526
            /** @noinspection PhpComposerExtensionStubsInspection */
8527 5
            $result = \mb_split($pattern, $str);
8528 5
            if ($result === false) {
8529
                return [];
8530
            }
8531
8532 5
            return $result;
8533
        }
8534
8535
        if ($limit > 0) {
8536
            ++$limit;
8537
        } else {
8538
            $limit = -1;
8539
        }
8540
8541
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8542
        if ($array === false) {
8543
            return [];
8544
        }
8545
8546
        if ($limit > 0 && \count($array) === $limit) {
8547
            \array_pop($array);
8548
        }
8549
8550
        return $array;
8551
    }
8552
8553
    /**
8554
     * Check if the string starts with the given substring.
8555
     *
8556
     * EXAMPLE: <code>
8557
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8558
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8559
     * </code>
8560
     *
8561
     * @param string $haystack <p>The string to search in.</p>
8562
     * @param string $needle   <p>The substring to search for.</p>
8563
     *
8564
     * @psalm-pure
8565
     *
8566
     * @return bool
8567
     */
8568 19
    public static function str_starts_with(string $haystack, string $needle): bool
8569
    {
8570 19
        if ($needle === '') {
8571 2
            return true;
8572
        }
8573
8574 19
        if ($haystack === '') {
8575 1
            return false;
8576
        }
8577
8578 19
        if (\PHP_VERSION_ID >= 80000) {
8579
            /** @phpstan-ignore-next-line - only for PHP8 */
8580 19
            return \str_starts_with($haystack, $needle);
8581
        }
8582
8583
        return \strncmp($haystack, $needle, \strlen($needle)) === 0;
8584
    }
8585
8586
    /**
8587
     * Returns true if the string begins with any of $substrings, false otherwise.
8588
     *
8589
     * - case-sensitive
8590
     *
8591
     * @param string $str        <p>The input string.</p>
8592
     * @param array  $substrings <p>Substrings to look for.</p>
8593
     *
8594
     * @psalm-pure
8595
     *
8596
     * @return bool
8597
     *              <p>Whether or not $str starts with $substring.</p>
8598
     */
8599 8
    public static function str_starts_with_any(string $str, array $substrings): bool
8600
    {
8601 8
        if ($str === '') {
8602
            return false;
8603
        }
8604
8605 8
        if ($substrings === []) {
8606
            return false;
8607
        }
8608
8609 8
        foreach ($substrings as &$substring) {
8610 8
            if (self::str_starts_with($str, $substring)) {
8611 2
                return true;
8612
            }
8613
        }
8614
8615 6
        return false;
8616
    }
8617
8618
    /**
8619
     * Gets the substring after the first occurrence of a separator.
8620
     *
8621
     * @param string $str       <p>The input string.</p>
8622
     * @param string $separator <p>The string separator.</p>
8623
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8624
     *
8625
     * @psalm-pure
8626
     *
8627
     * @return string
8628
     */
8629 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8630
    {
8631 1
        if ($separator === '' || $str === '') {
8632 1
            return '';
8633
        }
8634
8635 1
        if ($encoding === 'UTF-8') {
8636 1
            $offset = \mb_strpos($str, $separator);
8637 1
            if ($offset === false) {
8638 1
                return '';
8639
            }
8640
8641 1
            return (string) \mb_substr(
8642 1
                $str,
8643 1
                $offset + (int) \mb_strlen($separator)
8644
            );
8645
        }
8646
8647
        $offset = self::strpos($str, $separator, 0, $encoding);
8648
        if ($offset === false) {
8649
            return '';
8650
        }
8651
8652
        return (string) \mb_substr(
8653
            $str,
8654
            $offset + (int) self::strlen($separator, $encoding),
8655
            null,
8656
            $encoding
8657
        );
8658
    }
8659
8660
    /**
8661
     * Gets the substring after the last occurrence of a separator.
8662
     *
8663
     * @param string $str       <p>The input string.</p>
8664
     * @param string $separator <p>The string separator.</p>
8665
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8666
     *
8667
     * @psalm-pure
8668
     *
8669
     * @return string
8670
     */
8671 1
    public static function str_substr_after_last_separator(
8672
        string $str,
8673
        string $separator,
8674
        string $encoding = 'UTF-8'
8675
    ): string {
8676 1
        if ($separator === '' || $str === '') {
8677 1
            return '';
8678
        }
8679
8680 1
        if ($encoding === 'UTF-8') {
8681 1
            $offset = \mb_strrpos($str, $separator);
8682 1
            if ($offset === false) {
8683 1
                return '';
8684
            }
8685
8686 1
            return (string) \mb_substr(
8687 1
                $str,
8688 1
                $offset + (int) \mb_strlen($separator)
8689
            );
8690
        }
8691
8692
        $offset = self::strrpos($str, $separator, 0, $encoding);
8693
        if ($offset === false) {
8694
            return '';
8695
        }
8696
8697
        return (string) self::substr(
8698
            $str,
8699
            $offset + (int) self::strlen($separator, $encoding),
8700
            null,
8701
            $encoding
8702
        );
8703
    }
8704
8705
    /**
8706
     * Gets the substring before the first occurrence of a separator.
8707
     *
8708
     * @param string $str       <p>The input string.</p>
8709
     * @param string $separator <p>The string separator.</p>
8710
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8711
     *
8712
     * @psalm-pure
8713
     *
8714
     * @return string
8715
     */
8716 1
    public static function str_substr_before_first_separator(
8717
        string $str,
8718
        string $separator,
8719
        string $encoding = 'UTF-8'
8720
    ): string {
8721 1
        if ($separator === '' || $str === '') {
8722 1
            return '';
8723
        }
8724
8725 1
        if ($encoding === 'UTF-8') {
8726 1
            $offset = \mb_strpos($str, $separator);
8727 1
            if ($offset === false) {
8728 1
                return '';
8729
            }
8730
8731 1
            return (string) \mb_substr(
8732 1
                $str,
8733 1
                0,
8734 1
                $offset
8735
            );
8736
        }
8737
8738
        $offset = self::strpos($str, $separator, 0, $encoding);
8739
        if ($offset === false) {
8740
            return '';
8741
        }
8742
8743
        return (string) self::substr(
8744
            $str,
8745
            0,
8746
            $offset,
8747
            $encoding
8748
        );
8749
    }
8750
8751
    /**
8752
     * Gets the substring before the last occurrence of a separator.
8753
     *
8754
     * @param string $str       <p>The input string.</p>
8755
     * @param string $separator <p>The string separator.</p>
8756
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8757
     *
8758
     * @psalm-pure
8759
     *
8760
     * @return string
8761
     */
8762 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8763
    {
8764 1
        if ($separator === '' || $str === '') {
8765 1
            return '';
8766
        }
8767
8768 1
        if ($encoding === 'UTF-8') {
8769 1
            $offset = \mb_strrpos($str, $separator);
8770 1
            if ($offset === false) {
8771 1
                return '';
8772
            }
8773
8774 1
            return (string) \mb_substr(
8775 1
                $str,
8776 1
                0,
8777 1
                $offset
8778
            );
8779
        }
8780
8781
        $offset = self::strrpos($str, $separator, 0, $encoding);
8782
        if ($offset === false) {
8783
            return '';
8784
        }
8785
8786
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8787
8788
        return (string) self::substr(
8789
            $str,
8790
            0,
8791
            $offset,
8792
            $encoding
8793
        );
8794
    }
8795
8796
    /**
8797
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8798
     *
8799
     * @param string $str           <p>The input string.</p>
8800
     * @param string $needle        <p>The string to look for.</p>
8801
     * @param bool   $before_needle [optional] <p>Default: false</p>
8802
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8803
     *
8804
     * @psalm-pure
8805
     *
8806
     * @return string
8807
     */
8808 2
    public static function str_substr_first(
8809
        string $str,
8810
        string $needle,
8811
        bool $before_needle = false,
8812
        string $encoding = 'UTF-8'
8813
    ): string {
8814 2
        if ($str === '' || $needle === '') {
8815 2
            return '';
8816
        }
8817
8818 2
        if ($encoding === 'UTF-8') {
8819 2
            if ($before_needle) {
8820 1
                $part = \mb_strstr(
8821 1
                    $str,
8822 1
                    $needle,
8823 1
                    $before_needle
8824
                );
8825
            } else {
8826 2
                $part = \mb_strstr(
8827 1
                    $str,
8828 1
                    $needle
8829
                );
8830
            }
8831
        } else {
8832
            $part = self::strstr(
8833
                $str,
8834
                $needle,
8835
                $before_needle,
8836
                $encoding
8837
            );
8838
        }
8839
8840 2
        return $part === false ? '' : $part;
8841
    }
8842
8843
    /**
8844
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8845
     *
8846
     * @param string $str           <p>The input string.</p>
8847
     * @param string $needle        <p>The string to look for.</p>
8848
     * @param bool   $before_needle [optional] <p>Default: false</p>
8849
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8850
     *
8851
     * @psalm-pure
8852
     *
8853
     * @return string
8854
     */
8855 2
    public static function str_substr_last(
8856
        string $str,
8857
        string $needle,
8858
        bool $before_needle = false,
8859
        string $encoding = 'UTF-8'
8860
    ): string {
8861 2
        if ($str === '' || $needle === '') {
8862 2
            return '';
8863
        }
8864
8865 2
        if ($encoding === 'UTF-8') {
8866 2
            if ($before_needle) {
8867 1
                $part = \mb_strrchr(
8868 1
                    $str,
8869 1
                    $needle,
8870 1
                    $before_needle
8871
                );
8872
            } else {
8873 2
                $part = \mb_strrchr(
8874 1
                    $str,
8875 1
                    $needle
8876
                );
8877
            }
8878
        } else {
8879
            $part = self::strrchr(
8880
                $str,
8881
                $needle,
8882
                $before_needle,
8883
                $encoding
8884
            );
8885
        }
8886
8887 2
        return $part === false ? '' : $part;
8888
    }
8889
8890
    /**
8891
     * Surrounds $str with the given substring.
8892
     *
8893
     * @param string $str
8894
     * @param string $substring <p>The substring to add to both sides.</p>
8895
     *
8896
     * @psalm-pure
8897
     *
8898
     * @return string
8899
     *                <p>A string with the substring both prepended and appended.</p>
8900
     */
8901 5
    public static function str_surround(string $str, string $substring): string
8902
    {
8903 5
        return $substring . $str . $substring;
8904
    }
8905
8906
    /**
8907
     * Returns a trimmed string with the first letter of each word capitalized.
8908
     * Also accepts an array, $ignore, allowing you to list words not to be
8909
     * capitalized.
8910
     *
8911
     * @param string              $str
8912
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8913
     *                                                           null. Default: null</p>
8914
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8915
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8916
     *                                                           string.</p>
8917
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8918
     *                                                           el, lt, tr</p>
8919
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8920
     *                                                           e.g. ẞ -> ß</p>
8921
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8922
     *                                                           first</p>
8923
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8924
     *                                                           whitespace separator === words.</p>
8925
     *
8926
     * @psalm-pure
8927
     *
8928
     * @return string
8929
     *                <p>The titleized string.</p>
8930
     *
8931
     * @noinspection PhpTooManyParametersInspection
8932
     */
8933 10
    public static function str_titleize(
8934
        string $str,
8935
        array $ignore = null,
8936
        string $encoding = 'UTF-8',
8937
        bool $clean_utf8 = false,
8938
        string $lang = null,
8939
        bool $try_to_keep_the_string_length = false,
8940
        bool $use_trim_first = true,
8941
        string $word_define_chars = null
8942
    ): string {
8943 10
        if ($str === '') {
8944
            return '';
8945
        }
8946
8947 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8948 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8949
        }
8950
8951 10
        if ($use_trim_first) {
8952 10
            $str = \trim($str);
8953
        }
8954
8955 10
        if ($clean_utf8) {
8956
            $str = self::clean($str);
8957
        }
8958
8959 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8960
8961 10
        if ($word_define_chars) {
8962 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8963
        } else {
8964 6
            $word_define_chars = '';
8965
        }
8966
8967 10
        $str = (string) \preg_replace_callback(
8968 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8969 10
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8970 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8971 4
                    return $match[0];
8972
                }
8973
8974 10
                if ($use_mb_functions) {
8975 10
                    if ($encoding === 'UTF-8') {
8976 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8977 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8978
                    }
8979
8980
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8981
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8982
                }
8983
8984
                return self::ucfirst(
8985
                    self::strtolower(
8986
                        $match[0],
8987
                        $encoding,
8988
                        false,
8989
                        $lang,
8990
                        $try_to_keep_the_string_length
8991
                    ),
8992
                    $encoding,
8993
                    false,
8994
                    $lang,
8995
                    $try_to_keep_the_string_length
8996
                );
8997 10
            },
8998 10
            $str
8999
        );
9000
9001 10
        return $str;
9002
    }
9003
9004
    /**
9005
     * Convert a string into a obfuscate string.
9006
     *
9007
     * EXAMPLE: <code>
9008
     *
9009
     * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
9010
     * </code>
9011
     *
9012
     * @param string   $str
9013
     * @param float    $percent
9014
     * @param string   $obfuscateChar
9015
     * @param string[] $keepChars
9016
     *
9017
     * @psalm-pure
9018
     *
9019
     * @return string
9020
     *                <p>The obfuscate string.</p>
9021
     */
9022 1
    public static function str_obfuscate(
9023
        string $str,
9024
        float $percent = 0.5,
9025
        string $obfuscateChar = '*',
9026
        array $keepChars = []
9027
    ): string {
9028 1
        $obfuscateCharHelper = "\u{2603}";
9029 1
        $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str);
9030
9031 1
        $chars = self::chars($str);
9032 1
        $charsMax = \count($chars);
9033 1
        $charsMaxChange = \round($charsMax * $percent);
9034 1
        $charsCounter = 0;
9035 1
        $charKeyDone = [];
9036
9037 1
        while ($charsCounter < $charsMaxChange) {
9038 1
            foreach ($chars as $charKey => $char) {
9039 1
                if (isset($charKeyDone[$charKey])) {
9040 1
                    continue;
9041
                }
9042
9043 1
                if (\random_int(0, 100) > 50) {
9044 1
                    continue;
9045
                }
9046
9047 1
                if ($char === $obfuscateChar) {
9048
                    continue;
9049
                }
9050
9051 1
                ++$charsCounter;
9052 1
                $charKeyDone[$charKey] = true;
9053
9054 1
                if ($charsCounter > $charsMaxChange) {
9055
                    break;
9056
                }
9057
9058 1
                if (\in_array($char, $keepChars, true)) {
9059 1
                    continue;
9060
                }
9061
9062 1
                $chars[$charKey] = $obfuscateChar;
9063
            }
9064
        }
9065
9066 1
        $str = \implode('', $chars);
9067
9068 1
        return \str_replace($obfuscateCharHelper, $obfuscateChar, $str);
9069
    }
9070
9071
    /**
9072
     * Returns a trimmed string in proper title case.
9073
     *
9074
     * Also accepts an array, $ignore, allowing you to list words not to be
9075
     * capitalized.
9076
     *
9077
     * Adapted from John Gruber's script.
9078
     *
9079
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
9080
     *
9081
     * @param string $str
9082
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
9083
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9084
     *
9085
     * @psalm-pure
9086
     *
9087
     * @return string
9088
     *                <p>The titleized string.</p>
9089
     */
9090 35
    public static function str_titleize_for_humans(
9091
        string $str,
9092
        array $ignore = [],
9093
        string $encoding = 'UTF-8'
9094
    ): string {
9095 35
        if ($str === '') {
9096
            return '';
9097
        }
9098
9099 35
        $small_words = [
9100
            '(?<!q&)a',
9101
            'an',
9102
            'and',
9103
            'as',
9104
            'at(?!&t)',
9105
            'but',
9106
            'by',
9107
            'en',
9108
            'for',
9109
            'if',
9110
            'in',
9111
            'of',
9112
            'on',
9113
            'or',
9114
            'the',
9115
            'to',
9116
            'v[.]?',
9117
            'via',
9118
            'vs[.]?',
9119
        ];
9120
9121 35
        if ($ignore !== []) {
9122 1
            $small_words = \array_merge($small_words, $ignore);
9123
        }
9124
9125 35
        $small_words_rx = \implode('|', $small_words);
9126 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9127
9128 35
        $str = \trim($str);
9129
9130 35
        if (!self::has_lowercase($str)) {
9131 2
            $str = self::strtolower($str, $encoding);
9132
        }
9133
9134
        // the main substitutions
9135
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9136 35
        $str = (string) \preg_replace_callback(
9137
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9138
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9139 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9140
                        |
9141 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9142
                        |
9143 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9144
                        |
9145 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9146
                      ) (_*) \\b                                                          # 6. With trailing underscore
9147
                    ~ux',
9148
            /**
9149
             * @param string[] $matches
9150
             *
9151
             * @psalm-pure
9152
             *
9153
             * @return string
9154
             */
9155 35
            static function (array $matches) use ($encoding): string {
9156
                // preserve leading underscore
9157 35
                $str = $matches[1];
9158 35
                if ($matches[2]) {
9159
                    // preserve URLs, domains, emails and file paths
9160 5
                    $str .= $matches[2];
9161 35
                } elseif ($matches[3]) {
9162
                    // lower-case small words
9163 25
                    $str .= self::strtolower($matches[3], $encoding);
9164 35
                } elseif ($matches[4]) {
9165
                    // capitalize word w/o internal caps
9166 34
                    $str .= static::ucfirst($matches[4], $encoding);
9167
                } else {
9168
                    // preserve other kinds of word (iPhone)
9169 7
                    $str .= $matches[5];
9170
                }
9171
                // preserve trailing underscore
9172 35
                $str .= $matches[6];
9173
9174 35
                return $str;
9175 35
            },
9176 35
            $str
9177
        );
9178
9179
        // Exceptions for small words: capitalize at start of title...
9180 35
        $str = (string) \preg_replace_callback(
9181
            '~(  \\A [[:punct:]]*            # start of title...
9182
                      |  [:.;?!][ ]+                # or of subsentence...
9183
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9184 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9185
                     ~uxi',
9186
            /**
9187
             * @param string[] $matches
9188
             *
9189
             * @psalm-pure
9190
             *
9191
             * @return string
9192
             */
9193 35
            static function (array $matches) use ($encoding): string {
9194 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9195 35
            },
9196 35
            $str
9197
        );
9198
9199
        // ...and end of title
9200 35
        $str = (string) \preg_replace_callback(
9201 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9202
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9203
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9204
                     ~uxi',
9205
            /**
9206
             * @param string[] $matches
9207
             *
9208
             * @psalm-pure
9209
             *
9210
             * @return string
9211
             */
9212 35
            static function (array $matches) use ($encoding): string {
9213 3
                return static::ucfirst($matches[1], $encoding);
9214 35
            },
9215 35
            $str
9216
        );
9217
9218
        // Exceptions for small words in hyphenated compound words.
9219
        // e.g. "in-flight" -> In-Flight
9220 35
        $str = (string) \preg_replace_callback(
9221
            '~\\b
9222
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9223 35
                        ( ' . $small_words_rx . ' )
9224
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9225
                       ~uxi',
9226
            /**
9227
             * @param string[] $matches
9228
             *
9229
             * @psalm-pure
9230
             *
9231
             * @return string
9232
             */
9233 35
            static function (array $matches) use ($encoding): string {
9234
                return static::ucfirst($matches[1], $encoding);
9235 35
            },
9236 35
            $str
9237
        );
9238
9239
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9240 35
        $str = (string) \preg_replace_callback(
9241
            '~\\b
9242
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9243
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9244 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9245
                      (?!	- )                 # Negative lookahead for another -
9246
                     ~uxi',
9247
            /**
9248
             * @param string[] $matches
9249
             *
9250
             * @psalm-pure
9251
             *
9252
             * @return string
9253
             */
9254 35
            static function (array $matches) use ($encoding): string {
9255
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9256 35
            },
9257 35
            $str
9258
        );
9259
9260 35
        return $str;
9261
    }
9262
9263
    /**
9264
     * Get a binary representation of a specific string.
9265
     *
9266
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9267
     *
9268
     * @param string $str <p>The input string.</p>
9269
     *
9270
     * @psalm-pure
9271
     *
9272
     * @return false|string
9273
     *                      <p>false on error</p>
9274
     */
9275 2
    public static function str_to_binary(string $str)
9276
    {
9277
        /** @var array|false $value - needed for PhpStan (stubs error) */
9278 2
        $value = \unpack('H*', $str);
9279 2
        if ($value === false) {
9280
            return false;
9281
        }
9282
9283
        /** @noinspection OffsetOperationsInspection */
9284 2
        return \base_convert($value[1], 16, 2);
9285
    }
9286
9287
    /**
9288
     * @param string   $str
9289
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9290
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9291
     *
9292
     * @psalm-pure
9293
     *
9294
     * @return string[]
9295
     */
9296 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9297
    {
9298 17
        if ($str === '') {
9299 1
            return $remove_empty_values ? [] : [''];
9300
        }
9301
9302 16
        if (self::$SUPPORT['mbstring'] === true) {
9303
            /** @noinspection PhpComposerExtensionStubsInspection */
9304 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9305
        } else {
9306
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9307
        }
9308
9309 16
        if ($return === false) {
9310
            return $remove_empty_values ? [] : [''];
9311
        }
9312
9313
        if (
9314 16
            $remove_short_values === null
9315
            &&
9316 16
            !$remove_empty_values
9317
        ) {
9318 16
            return $return;
9319
        }
9320
9321
        return self::reduce_string_array(
9322
            $return,
9323
            $remove_empty_values,
9324
            $remove_short_values
9325
        );
9326
    }
9327
9328
    /**
9329
     * Convert a string into an array of words.
9330
     *
9331
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9332
     *
9333
     * @param string   $str
9334
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9335
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9336
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9337
     *
9338
     * @psalm-pure
9339
     *
9340
     * @return string[]
9341
     */
9342 13
    public static function str_to_words(
9343
        string $str,
9344
        string $char_list = '',
9345
        bool $remove_empty_values = false,
9346
        int $remove_short_values = null
9347
    ): array {
9348 13
        if ($str === '') {
9349 4
            return $remove_empty_values ? [] : [''];
9350
        }
9351
9352 13
        $char_list = self::rxClass($char_list, '\pL');
9353
9354 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9355 13
        if ($return === false) {
9356
            return $remove_empty_values ? [] : [''];
9357
        }
9358
9359
        if (
9360 13
            $remove_short_values === null
9361
            &&
9362 13
            !$remove_empty_values
9363
        ) {
9364 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9365
        }
9366
9367 2
        $tmp_return = self::reduce_string_array(
9368 2
            $return,
9369
            $remove_empty_values,
9370
            $remove_short_values
9371
        );
9372
9373 2
        foreach ($tmp_return as &$item) {
9374 2
            $item = (string) $item;
9375
        }
9376
9377 2
        return $tmp_return;
9378
    }
9379
9380
    /**
9381
     * alias for "UTF8::to_ascii()"
9382
     *
9383
     * @param string $str
9384
     * @param string $unknown
9385
     * @param bool   $strict
9386
     *
9387
     * @psalm-pure
9388
     *
9389
     * @return string
9390
     *
9391
     * @see        UTF8::to_ascii()
9392
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9393
     */
9394 7
    public static function str_transliterate(
9395
        string $str,
9396
        string $unknown = '?',
9397
        bool $strict = false
9398
    ): string {
9399 7
        return self::to_ascii($str, $unknown, $strict);
9400
    }
9401
9402
    /**
9403
     * Truncates the string to a given length. If $substring is provided, and
9404
     * truncating occurs, the string is further truncated so that the substring
9405
     * may be appended without exceeding the desired length.
9406
     *
9407
     * @param string $str
9408
     * @param int    $length    <p>Desired length of the truncated string.</p>
9409
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9410
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9411
     *
9412
     * @psalm-pure
9413
     *
9414
     * @return string
9415
     *                <p>A string after truncating.</p>
9416
     */
9417 22
    public static function str_truncate(
9418
        string $str,
9419
        int $length,
9420
        string $substring = '',
9421
        string $encoding = 'UTF-8'
9422
    ): string {
9423 22
        if ($str === '') {
9424
            return '';
9425
        }
9426
9427 22
        if ($encoding === 'UTF-8') {
9428 10
            if ($length >= (int) \mb_strlen($str)) {
9429 2
                return $str;
9430
            }
9431
9432 8
            if ($substring !== '') {
9433 4
                $length -= (int) \mb_strlen($substring);
9434
9435
                /** @noinspection UnnecessaryCastingInspection */
9436 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9437
            }
9438
9439
            /** @noinspection UnnecessaryCastingInspection */
9440 4
            return (string) \mb_substr($str, 0, $length);
9441
        }
9442
9443 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9444
9445 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9446 2
            return $str;
9447
        }
9448
9449 10
        if ($substring !== '') {
9450 6
            $length -= (int) self::strlen($substring, $encoding);
9451
        }
9452
9453
        return (
9454 10
               (string) self::substr(
9455 10
                   $str,
9456 10
                   0,
9457
                   $length,
9458
                   $encoding
9459
               )
9460 10
               ) . $substring;
9461
    }
9462
9463
    /**
9464
     * Truncates the string to a given length, while ensuring that it does not
9465
     * split words. If $substring is provided, and truncating occurs, the
9466
     * string is further truncated so that the substring may be appended without
9467
     * exceeding the desired length.
9468
     *
9469
     * @param string $str
9470
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9471
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9472
     *                                                       Default:
9473
     *                                                       ''</p>
9474
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9475
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9476
     *
9477
     * @psalm-pure
9478
     *
9479
     * @return string
9480
     *                <p>A string after truncating.</p>
9481
     */
9482 47
    public static function str_truncate_safe(
9483
        string $str,
9484
        int $length,
9485
        string $substring = '',
9486
        string $encoding = 'UTF-8',
9487
        bool $ignore_do_not_split_words_for_one_word = false
9488
    ): string {
9489 47
        if ($str === '' || $length <= 0) {
9490 1
            return $substring;
9491
        }
9492
9493 47
        if ($encoding === 'UTF-8') {
9494 21
            if ($length >= (int) \mb_strlen($str)) {
9495 5
                return $str;
9496
            }
9497
9498
            // need to further trim the string so we can append the substring
9499 17
            $length -= (int) \mb_strlen($substring);
9500 17
            if ($length <= 0) {
9501 1
                return $substring;
9502
            }
9503
9504
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9505 17
            $truncated = \mb_substr($str, 0, $length);
9506 17
            if ($truncated === false) {
9507
                return '';
9508
            }
9509
9510
            // if the last word was truncated
9511 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9512 17
            if ($space_position !== $length) {
9513
                // find pos of the last occurrence of a space, get up to that
9514 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9515
9516
                if (
9517 13
                    $last_position !== false
9518
                    ||
9519
                    (
9520 3
                        $space_position !== false
9521
                        &&
9522 13
                        !$ignore_do_not_split_words_for_one_word
9523
                    )
9524
                ) {
9525 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9526
                }
9527
            }
9528
        } else {
9529 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9530
9531 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9532 4
                return $str;
9533
            }
9534
9535
            // need to further trim the string so we can append the substring
9536 22
            $length -= (int) self::strlen($substring, $encoding);
9537 22
            if ($length <= 0) {
9538
                return $substring;
9539
            }
9540
9541 22
            $truncated = self::substr($str, 0, $length, $encoding);
9542
9543 22
            if ($truncated === false) {
9544
                return '';
9545
            }
9546
9547
            // if the last word was truncated
9548 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9549 22
            if ($space_position !== $length) {
9550
                // find pos of the last occurrence of a space, get up to that
9551 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9552
9553
                if (
9554 12
                    $last_position !== false
9555
                    ||
9556
                    (
9557 4
                        $space_position !== false
9558
                        &&
9559 12
                        !$ignore_do_not_split_words_for_one_word
9560
                    )
9561
                ) {
9562 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9563
                }
9564
            }
9565
        }
9566
9567 39
        return $truncated . $substring;
9568
    }
9569
9570
    /**
9571
     * Returns a lowercase and trimmed string separated by underscores.
9572
     * Underscores are inserted before uppercase characters (with the exception
9573
     * of the first character of the string), and in place of spaces as well as
9574
     * dashes.
9575
     *
9576
     * @param string $str
9577
     *
9578
     * @psalm-pure
9579
     *
9580
     * @return string
9581
     *                <p>The underscored string.</p>
9582
     */
9583 16
    public static function str_underscored(string $str): string
9584
    {
9585 16
        return self::str_delimit($str, '_');
9586
    }
9587
9588
    /**
9589
     * Returns an UpperCamelCase version of the supplied string. It trims
9590
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9591
     * and underscores, and removes spaces, dashes, underscores.
9592
     *
9593
     * @param string      $str                           <p>The input string.</p>
9594
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9595
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9596
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9597
     *                                                   tr</p>
9598
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9599
     *                                                   -> ß</p>
9600
     *
9601
     * @psalm-pure
9602
     *
9603
     * @return string
9604
     *                <p>A string in UpperCamelCase.</p>
9605
     */
9606 13
    public static function str_upper_camelize(
9607
        string $str,
9608
        string $encoding = 'UTF-8',
9609
        bool $clean_utf8 = false,
9610
        string $lang = null,
9611
        bool $try_to_keep_the_string_length = false
9612
    ): string {
9613 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9614
    }
9615
9616
    /**
9617
     * alias for "UTF8::ucfirst()"
9618
     *
9619
     * @param string      $str
9620
     * @param string      $encoding
9621
     * @param bool        $clean_utf8
9622
     * @param string|null $lang
9623
     * @param bool        $try_to_keep_the_string_length
9624
     *
9625
     * @psalm-pure
9626
     *
9627
     * @return string
9628
     *
9629
     * @see        UTF8::ucfirst()
9630
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9631
     */
9632 5
    public static function str_upper_first(
9633
        string $str,
9634
        string $encoding = 'UTF-8',
9635
        bool $clean_utf8 = false,
9636
        string $lang = null,
9637
        bool $try_to_keep_the_string_length = false
9638
    ): string {
9639 5
        return self::ucfirst(
9640 5
            $str,
9641
            $encoding,
9642
            $clean_utf8,
9643
            $lang,
9644
            $try_to_keep_the_string_length
9645
        );
9646
    }
9647
9648
    /**
9649
     * Get the number of words in a specific string.
9650
     *
9651
     * EXAMPLES: <code>
9652
     * // format: 0 -> return only word count (int)
9653
     * //
9654
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9655
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9656
     *
9657
     * // format: 1 -> return words (array)
9658
     * //
9659
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9660
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9661
     *
9662
     * // format: 2 -> return words with offset (array)
9663
     * //
9664
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9665
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9666
     * </code>
9667
     *
9668
     * @param string $str       <p>The input string.</p>
9669
     * @param int    $format    [optional] <p>
9670
     *                          <strong>0</strong> => return a number of words (default)<br>
9671
     *                          <strong>1</strong> => return an array of words<br>
9672
     *                          <strong>2</strong> => return an array of words with word-offset as key
9673
     *                          </p>
9674
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9675
     *
9676
     * @psalm-pure
9677
     *
9678
     * @return int|string[]
9679
     *                      <p>The number of words in the string.</p>
9680
     */
9681 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9682
    {
9683 2
        $str_parts = self::str_to_words($str, $char_list);
9684
9685 2
        $len = \count($str_parts);
9686
9687 2
        if ($format === 1) {
9688 2
            $number_of_words = [];
9689 2
            for ($i = 1; $i < $len; $i += 2) {
9690 2
                $number_of_words[] = $str_parts[$i];
9691
            }
9692 2
        } elseif ($format === 2) {
9693 2
            $number_of_words = [];
9694 2
            $offset = (int) self::strlen($str_parts[0]);
9695 2
            for ($i = 1; $i < $len; $i += 2) {
9696 2
                $number_of_words[$offset] = $str_parts[$i];
9697 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9698
            }
9699
        } else {
9700 2
            $number_of_words = (int) (($len - 1) / 2);
9701
        }
9702
9703 2
        return $number_of_words;
9704
    }
9705
9706
    /**
9707
     * Case-insensitive string comparison.
9708
     *
9709
     * INFO: Case-insensitive version of UTF8::strcmp()
9710
     *
9711
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9712
     *
9713
     * @param string $str1     <p>The first string.</p>
9714
     * @param string $str2     <p>The second string.</p>
9715
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9716
     *
9717
     * @psalm-pure
9718
     *
9719
     * @return int
9720
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9721
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9722
     *             <strong>0</strong> if they are equal
9723
     */
9724 23
    public static function strcasecmp(
9725
        string $str1,
9726
        string $str2,
9727
        string $encoding = 'UTF-8'
9728
    ): int {
9729 23
        return self::strcmp(
9730 23
            self::strtocasefold(
9731 23
                $str1,
9732 23
                true,
9733 23
                false,
9734
                $encoding,
9735 23
                null,
9736 23
                false
9737
            ),
9738 23
            self::strtocasefold(
9739 23
                $str2,
9740 23
                true,
9741 23
                false,
9742
                $encoding,
9743 23
                null,
9744 23
                false
9745
            )
9746
        );
9747
    }
9748
9749
    /**
9750
     * alias for "UTF8::strstr()"
9751
     *
9752
     * @param string $haystack
9753
     * @param string $needle
9754
     * @param bool   $before_needle
9755
     * @param string $encoding
9756
     * @param bool   $clean_utf8
9757
     *
9758
     * @psalm-pure
9759
     *
9760
     * @return false|string
9761
     *
9762
     * @see        UTF8::strstr()
9763
     * @deprecated <p>please use "UTF8::strstr()"</p>
9764
     */
9765 2
    public static function strchr(
9766
        string $haystack,
9767
        string $needle,
9768
        bool $before_needle = false,
9769
        string $encoding = 'UTF-8',
9770
        bool $clean_utf8 = false
9771
    ) {
9772 2
        return self::strstr(
9773 2
            $haystack,
9774
            $needle,
9775
            $before_needle,
9776
            $encoding,
9777
            $clean_utf8
9778
        );
9779
    }
9780
9781
    /**
9782
     * Case-sensitive string comparison.
9783
     *
9784
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9785
     *
9786
     * @param string $str1 <p>The first string.</p>
9787
     * @param string $str2 <p>The second string.</p>
9788
     *
9789
     * @psalm-pure
9790
     *
9791
     * @return int
9792
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9793
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9794
     *             <strong>0</strong> if they are equal
9795
     */
9796 29
    public static function strcmp(string $str1, string $str2): int
9797
    {
9798 29
        if ($str1 === $str2) {
9799 21
            return 0;
9800
        }
9801
9802 24
        return \strcmp(
9803
            /** @phpstan-ignore-next-line - we use only NFD */
9804 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9805
            /** @phpstan-ignore-next-line - we use only NFD */
9806 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9807
        );
9808
    }
9809
9810
    /**
9811
     * Find length of initial segment not matching mask.
9812
     *
9813
     * @param string   $str
9814
     * @param string   $char_list
9815
     * @param int      $offset
9816
     * @param int|null $length
9817
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9818
     *
9819
     * @psalm-pure
9820
     *
9821
     * @return int
9822
     */
9823 12
    public static function strcspn(
9824
        string $str,
9825
        string $char_list,
9826
        int $offset = 0,
9827
        int $length = null,
9828
        string $encoding = 'UTF-8'
9829
    ): int {
9830 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9831
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9832
        }
9833
9834 12
        if ($char_list === '') {
9835 2
            return (int) self::strlen($str, $encoding);
9836
        }
9837
9838 11
        if ($offset || $length !== null) {
9839 3
            if ($encoding === 'UTF-8') {
9840 3
                if ($length === null) {
9841 2
                    $str_tmp = \mb_substr($str, $offset);
9842
                } else {
9843 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9844
                }
9845
            } else {
9846
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9847
            }
9848
9849 3
            if ($str_tmp === false) {
9850
                return 0;
9851
            }
9852
9853
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9854 3
            $str = $str_tmp;
9855
        }
9856
9857 11
        if ($str === '') {
9858 2
            return 0;
9859
        }
9860
9861 10
        $matches = [];
9862 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9863 9
            $return = self::strlen($matches[1], $encoding);
9864 9
            if ($return === false) {
9865
                return 0;
9866
            }
9867
9868 9
            return $return;
9869
        }
9870
9871 2
        return (int) self::strlen($str, $encoding);
9872
    }
9873
9874
    /**
9875
     * alias for "UTF8::stristr()"
9876
     *
9877
     * @param string $haystack
9878
     * @param string $needle
9879
     * @param bool   $before_needle
9880
     * @param string $encoding
9881
     * @param bool   $clean_utf8
9882
     *
9883
     * @psalm-pure
9884
     *
9885
     * @return false|string
9886
     *
9887
     * @see        UTF8::stristr()
9888
     * @deprecated <p>please use "UTF8::stristr()"</p>
9889
     */
9890 1
    public static function strichr(
9891
        string $haystack,
9892
        string $needle,
9893
        bool $before_needle = false,
9894
        string $encoding = 'UTF-8',
9895
        bool $clean_utf8 = false
9896
    ) {
9897 1
        return self::stristr(
9898 1
            $haystack,
9899
            $needle,
9900
            $before_needle,
9901
            $encoding,
9902
            $clean_utf8
9903
        );
9904
    }
9905
9906
    /**
9907
     * Create a UTF-8 string from code points.
9908
     *
9909
     * INFO: opposite to UTF8::codepoints()
9910
     *
9911
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9912
     *
9913
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9914
     *
9915
     * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex
9916
     *
9917
     * @psalm-pure
9918
     *
9919
     * @return string
9920
     *                <p>A UTF-8 encoded string.</p>
9921
     */
9922 4
    public static function string($intOrHex): string
9923
    {
9924 4
        if ($intOrHex === []) {
9925 4
            return '';
9926
        }
9927
9928 4
        if (!\is_array($intOrHex)) {
9929 1
            $intOrHex = [$intOrHex];
9930
        }
9931
9932 4
        $str = '';
9933 4
        foreach ($intOrHex as $strPart) {
9934 4
            $str .= '&#' . (int) $strPart . ';';
9935
        }
9936
9937 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9938
    }
9939
9940
    /**
9941
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9942
     *
9943
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9944
     *
9945
     * @param string $str <p>The input string.</p>
9946
     *
9947
     * @psalm-pure
9948
     *
9949
     * @return bool
9950
     *              <p>
9951
     *              <strong>true</strong> if the string has BOM at the start,<br>
9952
     *              <strong>false</strong> otherwise
9953
     *              </p>
9954
     */
9955 6
    public static function string_has_bom(string $str): bool
9956
    {
9957
        /** @noinspection PhpUnusedLocalVariableInspection */
9958 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9959 6
            if (\strncmp($str, $bom_string, $bom_byte_length) === 0) {
9960 6
                return true;
9961
            }
9962
        }
9963
9964 6
        return false;
9965
    }
9966
9967
    /**
9968
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9969
     *
9970
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9971
     *
9972
     * @see http://php.net/manual/en/function.strip-tags.php
9973
     *
9974
     * @param string      $str            <p>
9975
     *                                    The input string.
9976
     *                                    </p>
9977
     * @param string|null $allowable_tags [optional] <p>
9978
     *                                    You can use the optional second parameter to specify tags which should
9979
     *                                    not be stripped.
9980
     *                                    </p>
9981
     *                                    <p>
9982
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9983
     *                                    can not be changed with allowable_tags.
9984
     *                                    </p>
9985
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9986
     *
9987
     * @psalm-pure
9988
     *
9989
     * @return string
9990
     *                <p>The stripped string.</p>
9991
     */
9992 4
    public static function strip_tags(
9993
        string $str,
9994
        string $allowable_tags = null,
9995
        bool $clean_utf8 = false
9996
    ): string {
9997 4
        if ($str === '') {
9998 1
            return '';
9999
        }
10000
10001 4
        if ($clean_utf8) {
10002 2
            $str = self::clean($str);
10003
        }
10004
10005 4
        if ($allowable_tags === null) {
10006 4
            return \strip_tags($str);
10007
        }
10008
10009 2
        return \strip_tags($str, $allowable_tags);
10010
    }
10011
10012
    /**
10013
     * Strip all whitespace characters. This includes tabs and newline
10014
     * characters, as well as multibyte whitespace such as the thin space
10015
     * and ideographic space.
10016
     *
10017
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
10018
     *
10019
     * @param string $str
10020
     *
10021
     * @psalm-pure
10022
     *
10023
     * @return string
10024
     */
10025 36
    public static function strip_whitespace(string $str): string
10026
    {
10027 36
        if ($str === '') {
10028 3
            return '';
10029
        }
10030
10031 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
10032
    }
10033
10034
    /**
10035
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10036
     *
10037
     * INFO: use UTF8::stripos_in_byte() for the byte-length
10038
     *
10039
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
10040
     *
10041
     * @see http://php.net/manual/en/function.mb-stripos.php
10042
     *
10043
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10044
     * @param string $needle     <p>The string to find in haystack.</p>
10045
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
10046
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10047
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10048
     *
10049
     * @psalm-pure
10050
     *
10051
     * @return false|int
10052
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
10053
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
10054
     */
10055 25
    public static function stripos(
10056
        string $haystack,
10057
        string $needle,
10058
        int $offset = 0,
10059
        string $encoding = 'UTF-8',
10060
        bool $clean_utf8 = false
10061
    ) {
10062 25
        if ($haystack === '') {
10063 5
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10064 2
                return 0;
10065
            }
10066
10067 5
            return false;
10068
        }
10069
10070 24
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10071
            return false;
10072
        }
10073
10074 24
        if ($clean_utf8) {
10075
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10076
            // if invalid characters are found in $haystack before $needle
10077 1
            $haystack = self::clean($haystack);
10078 1
            $needle = self::clean($needle);
10079
        }
10080
10081 24
        if (self::$SUPPORT['mbstring'] === true) {
10082 24
            if ($encoding === 'UTF-8') {
10083 24
                return \mb_stripos($haystack, $needle, $offset);
10084
            }
10085
10086 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10087
10088 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
10089
        }
10090
10091 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10092
10093
        if (
10094 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
10095
            &&
10096 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
10097
            &&
10098 2
            self::$SUPPORT['intl'] === true
10099
        ) {
10100
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
10101
            if ($return_tmp !== false) {
10102
                return $return_tmp;
10103
            }
10104
        }
10105
10106
        //
10107
        // fallback for ascii only
10108
        //
10109
10110 2
        if (ASCII::is_ascii($haystack . $needle)) {
10111 2
            return \stripos($haystack, $needle, $offset);
10112
        }
10113
10114
        //
10115
        // fallback via vanilla php
10116
        //
10117
10118 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
10119 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
10120
10121 2
        return self::strpos($haystack, $needle, $offset, $encoding);
10122
    }
10123
10124
    /**
10125
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10126
     *
10127
     * EXAMPLE: <code>
10128
     * $str = 'iñtërnâtiônàlizætiøn';
10129
     * $search = 'NÂT';
10130
     *
10131
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10132
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10133
     * </code>
10134
     *
10135
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10136
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10137
     * @param bool   $before_needle [optional] <p>
10138
     *                              If <b>TRUE</b>, it returns the part of the
10139
     *                              haystack before the first occurrence of the needle (excluding the needle).
10140
     *                              </p>
10141
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10142
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10143
     *
10144
     * @psalm-pure
10145
     *
10146
     * @return false|string
10147
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10148
     */
10149 13
    public static function stristr(
10150
        string $haystack,
10151
        string $needle,
10152
        bool $before_needle = false,
10153
        string $encoding = 'UTF-8',
10154
        bool $clean_utf8 = false
10155
    ) {
10156 13
        if ($haystack === '') {
10157 3
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10158 2
                return '';
10159
            }
10160
10161 2
            return false;
10162
        }
10163
10164 11
        if ($clean_utf8) {
10165
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10166
            // if invalid characters are found in $haystack before $needle
10167 1
            $needle = self::clean($needle);
10168 1
            $haystack = self::clean($haystack);
10169
        }
10170
10171 11
        if ($needle === '') {
10172 2
            if (\PHP_VERSION_ID >= 80000) {
10173 2
                return $haystack;
10174
            }
10175
10176
            return false;
10177
        }
10178
10179 10
        if (self::$SUPPORT['mbstring'] === true) {
10180 10
            if ($encoding === 'UTF-8') {
10181 10
                return \mb_stristr($haystack, $needle, $before_needle);
10182
            }
10183
10184 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10185
10186 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10187
        }
10188
10189
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10190
10191
        if (
10192
            $encoding !== 'UTF-8'
10193
            &&
10194
            self::$SUPPORT['mbstring'] === false
10195
        ) {
10196
            /**
10197
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10198
             */
10199
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10200
        }
10201
10202
        if (
10203
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10204
            &&
10205
            self::$SUPPORT['intl'] === true
10206
        ) {
10207
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10208
            if ($return_tmp !== false) {
10209
                return $return_tmp;
10210
            }
10211
        }
10212
10213
        if (ASCII::is_ascii($needle . $haystack)) {
10214
            return \stristr($haystack, $needle, $before_needle);
10215
        }
10216
10217
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10218
10219
        if (!isset($match[1])) {
10220
            return false;
10221
        }
10222
10223
        if ($before_needle) {
10224
            return $match[1];
10225
        }
10226
10227
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10228
    }
10229
10230
    /**
10231
     * Get the string length, not the byte-length!
10232
     *
10233
     * INFO: use UTF8::strwidth() for the char-length
10234
     *
10235
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10236
     *
10237
     * @see http://php.net/manual/en/function.mb-strlen.php
10238
     *
10239
     * @param string $str        <p>The string being checked for length.</p>
10240
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10241
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10242
     *
10243
     * @psalm-pure
10244
     *
10245
     * @return false|int
10246
     *                   <p>
10247
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10248
     *                   $encoding.
10249
     *                   (One multi-byte character counted as +1).
10250
     *                   <br>
10251
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10252
     *                   chars.
10253
     *                   </p>
10254
     */
10255 174
    public static function strlen(
10256
        string $str,
10257
        string $encoding = 'UTF-8',
10258
        bool $clean_utf8 = false
10259
    ) {
10260 174
        if ($str === '') {
10261 25
            return 0;
10262
        }
10263
10264 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10265 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10266
        }
10267
10268 172
        if ($clean_utf8) {
10269
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10270
            // if invalid characters are found in $str
10271 5
            $str = self::clean($str);
10272
        }
10273
10274
        //
10275
        // fallback via mbstring
10276
        //
10277
10278 172
        if (self::$SUPPORT['mbstring'] === true) {
10279 166
            if ($encoding === 'UTF-8') {
10280
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10281 166
                return @\mb_strlen($str);
10282
            }
10283
10284
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10285 4
            return @\mb_strlen($str, $encoding);
10286
        }
10287
10288
        //
10289
        // fallback for binary || ascii only
10290
        //
10291
10292
        if (
10293 8
            $encoding === 'CP850'
10294
            ||
10295 8
            $encoding === 'ASCII'
10296
        ) {
10297
            return \strlen($str);
10298
        }
10299
10300
        if (
10301 8
            $encoding !== 'UTF-8'
10302
            &&
10303 8
            self::$SUPPORT['mbstring'] === false
10304
            &&
10305 8
            self::$SUPPORT['iconv'] === false
10306
        ) {
10307
            /**
10308
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10309
             */
10310 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10311
        }
10312
10313
        //
10314
        // fallback via iconv
10315
        //
10316
10317 8
        if (self::$SUPPORT['iconv'] === true) {
10318
            $return_tmp = \iconv_strlen($str, $encoding);
10319
            if ($return_tmp !== false) {
10320
                return $return_tmp;
10321
            }
10322
        }
10323
10324
        //
10325
        // fallback via intl
10326
        //
10327
10328
        if (
10329 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10330
            &&
10331 8
            self::$SUPPORT['intl'] === true
10332
        ) {
10333
            $return_tmp = \grapheme_strlen($str);
10334
            if ($return_tmp !== null) {
10335
                return $return_tmp;
10336
            }
10337
        }
10338
10339
        //
10340
        // fallback for ascii only
10341
        //
10342
10343 8
        if (ASCII::is_ascii($str)) {
10344 4
            return \strlen($str);
10345
        }
10346
10347
        //
10348
        // fallback via vanilla php
10349
        //
10350
10351 8
        \preg_match_all('/./us', $str, $parts);
10352
10353 8
        $return_tmp = \count($parts[0]);
10354 8
        if ($return_tmp === 0) {
10355
            return false;
10356
        }
10357
10358 8
        return $return_tmp;
10359
    }
10360
10361
    /**
10362
     * Get string length in byte.
10363
     *
10364
     * @param string $str
10365
     *
10366
     * @psalm-pure
10367
     *
10368
     * @return int
10369
     */
10370 1
    public static function strlen_in_byte(string $str): int
10371
    {
10372 1
        if ($str === '') {
10373
            return 0;
10374
        }
10375
10376 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10377
            // "mb_" is available if overload is used, so use it ...
10378
            return \mb_strlen($str, 'CP850'); // 8-BIT
10379
        }
10380
10381 1
        return \strlen($str);
10382
    }
10383
10384
    /**
10385
     * Case-insensitive string comparisons using a "natural order" algorithm.
10386
     *
10387
     * INFO: natural order version of UTF8::strcasecmp()
10388
     *
10389
     * EXAMPLES: <code>
10390
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10391
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10392
     *
10393
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10394
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10395
     * </code>
10396
     *
10397
     * @param string $str1     <p>The first string.</p>
10398
     * @param string $str2     <p>The second string.</p>
10399
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10400
     *
10401
     * @psalm-pure
10402
     *
10403
     * @return int
10404
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10405
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10406
     *             <strong>0</strong> if they are equal
10407
     */
10408 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10409
    {
10410 2
        return self::strnatcmp(
10411 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10412 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10413
        );
10414
    }
10415
10416
    /**
10417
     * String comparisons using a "natural order" algorithm
10418
     *
10419
     * INFO: natural order version of UTF8::strcmp()
10420
     *
10421
     * EXAMPLES: <code>
10422
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10423
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10424
     *
10425
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10426
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10427
     * </code>
10428
     *
10429
     * @see http://php.net/manual/en/function.strnatcmp.php
10430
     *
10431
     * @param string $str1 <p>The first string.</p>
10432
     * @param string $str2 <p>The second string.</p>
10433
     *
10434
     * @psalm-pure
10435
     *
10436
     * @return int
10437
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10438
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10439
     *             <strong>0</strong> if they are equal
10440
     */
10441 4
    public static function strnatcmp(string $str1, string $str2): int
10442
    {
10443 4
        if ($str1 === $str2) {
10444 4
            return 0;
10445
        }
10446
10447 4
        return \strnatcmp(
10448 4
            (string) self::strtonatfold($str1),
10449 4
            (string) self::strtonatfold($str2)
10450
        );
10451
    }
10452
10453
    /**
10454
     * Case-insensitive string comparison of the first n characters.
10455
     *
10456
     * EXAMPLE: <code>
10457
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10458
     * </code>
10459
     *
10460
     * @see http://php.net/manual/en/function.strncasecmp.php
10461
     *
10462
     * @param string $str1     <p>The first string.</p>
10463
     * @param string $str2     <p>The second string.</p>
10464
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10465
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10466
     *
10467
     * @psalm-pure
10468
     *
10469
     * @return int
10470
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10471
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10472
     *             <strong>0</strong> if they are equal
10473
     */
10474 2
    public static function strncasecmp(
10475
        string $str1,
10476
        string $str2,
10477
        int $len,
10478
        string $encoding = 'UTF-8'
10479
    ): int {
10480 2
        return self::strncmp(
10481 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10482 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10483
            $len
10484
        );
10485
    }
10486
10487
    /**
10488
     * String comparison of the first n characters.
10489
     *
10490
     * EXAMPLE: <code>
10491
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10492
     * </code>
10493
     *
10494
     * @see http://php.net/manual/en/function.strncmp.php
10495
     *
10496
     * @param string $str1     <p>The first string.</p>
10497
     * @param string $str2     <p>The second string.</p>
10498
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10499
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10500
     *
10501
     * @psalm-pure
10502
     *
10503
     * @return int
10504
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10505
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10506
     *             <strong>0</strong> if they are equal
10507
     */
10508 4
    public static function strncmp(
10509
        string $str1,
10510
        string $str2,
10511
        int $len,
10512
        string $encoding = 'UTF-8'
10513
    ): int {
10514 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10515
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10516
        }
10517
10518 4
        if ($encoding === 'UTF-8') {
10519 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10520 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10521
        } else {
10522
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10523
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10524
        }
10525
10526 4
        return self::strcmp($str1, $str2);
10527
    }
10528
10529
    /**
10530
     * Search a string for any of a set of characters.
10531
     *
10532
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10533
     *
10534
     * @see http://php.net/manual/en/function.strpbrk.php
10535
     *
10536
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10537
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10538
     *
10539
     * @psalm-pure
10540
     *
10541
     * @return false|string
10542
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10543
     */
10544 2
    public static function strpbrk(string $haystack, string $char_list)
10545
    {
10546 2
        if ($haystack === '' || $char_list === '') {
10547 2
            return false;
10548
        }
10549
10550 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10551 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10552
        }
10553
10554 2
        return false;
10555
    }
10556
10557
    /**
10558
     * Find the position of the first occurrence of a substring in a string.
10559
     *
10560
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10561
     *
10562
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10563
     *
10564
     * @see http://php.net/manual/en/function.mb-strpos.php
10565
     *
10566
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10567
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10568
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10569
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10570
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10571
     *
10572
     * @psalm-pure
10573
     *
10574
     * @return false|int
10575
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10576
     *                   string.<br> If needle is not found it returns false.
10577
     */
10578 52
    public static function strpos(
10579
        string $haystack,
10580
        $needle,
10581
        int $offset = 0,
10582
        string $encoding = 'UTF-8',
10583
        bool $clean_utf8 = false
10584
    ) {
10585 52
        if ($haystack === '') {
10586 4
            if (\PHP_VERSION_ID >= 80000) {
10587 4
                if ($needle === '') {
10588 4
                    return 0;
10589
                }
10590
            } else {
10591
                return false;
10592
            }
10593
        }
10594
10595
        // iconv and mbstring do not support integer $needle
10596 52
        if ((int) $needle === $needle) {
10597
            $needle = (string) self::chr($needle);
10598
        }
10599 52
        $needle = (string) $needle;
10600
10601 52
        if ($haystack === '') {
10602 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
10603
                return 0;
10604
            }
10605
10606 2
            return false;
10607
        }
10608
10609 51
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
10610
            return false;
10611
        }
10612
10613 51
        if ($clean_utf8) {
10614
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10615
            // if invalid characters are found in $haystack before $needle
10616 3
            $needle = self::clean($needle);
10617 3
            $haystack = self::clean($haystack);
10618
        }
10619
10620 51
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10621 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10622
        }
10623
10624
        //
10625
        // fallback via mbstring
10626
        //
10627
10628 51
        if (self::$SUPPORT['mbstring'] === true) {
10629 49
            if ($encoding === 'UTF-8') {
10630
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10631 49
                return @\mb_strpos($haystack, $needle, $offset);
10632
            }
10633
10634
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10635 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10636
        }
10637
10638
        //
10639
        // fallback for binary || ascii only
10640
        //
10641
        if (
10642 4
            $encoding === 'CP850'
10643
            ||
10644 4
            $encoding === 'ASCII'
10645
        ) {
10646 2
            return \strpos($haystack, $needle, $offset);
10647
        }
10648
10649
        if (
10650 4
            $encoding !== 'UTF-8'
10651
            &&
10652 4
            self::$SUPPORT['iconv'] === false
10653
            &&
10654 4
            self::$SUPPORT['mbstring'] === false
10655
        ) {
10656
            /**
10657
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10658
             */
10659 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10660
        }
10661
10662
        //
10663
        // fallback via intl
10664
        //
10665
10666
        if (
10667 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10668
            &&
10669 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10670
            &&
10671 4
            self::$SUPPORT['intl'] === true
10672
        ) {
10673
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10674
            if ($return_tmp !== false) {
10675
                return $return_tmp;
10676
            }
10677
        }
10678
10679
        //
10680
        // fallback via iconv
10681
        //
10682
10683
        if (
10684 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10685
            &&
10686 4
            self::$SUPPORT['iconv'] === true
10687
        ) {
10688
            // ignore invalid negative offset to keep compatibility
10689
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10690
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10691
            if ($return_tmp !== false) {
10692
                return $return_tmp;
10693
            }
10694
        }
10695
10696
        //
10697
        // fallback for ascii only
10698
        //
10699
10700 4
        if (ASCII::is_ascii($haystack . $needle)) {
10701
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10702 2
            return @\strpos($haystack, $needle, $offset);
10703
        }
10704
10705
        //
10706
        // fallback via vanilla php
10707
        //
10708
10709 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10710 4
        if ($haystack_tmp === false) {
10711
            $haystack_tmp = '';
10712
        }
10713 4
        $haystack = (string) $haystack_tmp;
10714
10715 4
        if ($offset < 0) {
10716
            $offset = 0;
10717
        }
10718
10719 4
        $pos = \strpos($haystack, $needle);
10720 4
        if ($pos === false) {
10721 3
            return false;
10722
        }
10723
10724 4
        if ($pos) {
10725 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10726
        }
10727
10728 4
        return $offset + 0;
10729
    }
10730
10731
    /**
10732
     * Find the position of the first occurrence of a substring in a string.
10733
     *
10734
     * @param string $haystack <p>
10735
     *                         The string being checked.
10736
     *                         </p>
10737
     * @param string $needle   <p>
10738
     *                         The position counted from the beginning of haystack.
10739
     *                         </p>
10740
     * @param int    $offset   [optional] <p>
10741
     *                         The search offset. If it is not specified, 0 is used.
10742
     *                         </p>
10743
     *
10744
     * @psalm-pure
10745
     *
10746
     * @return false|int
10747
     *                   <p>The numeric position of the first occurrence of needle in the
10748
     *                   haystack string. If needle is not found, it returns false.</p>
10749
     */
10750 2
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10751
    {
10752 2
        if ($haystack === '' || $needle === '') {
10753
            return false;
10754
        }
10755
10756 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10757
            // "mb_" is available if overload is used, so use it ...
10758
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10759
        }
10760
10761 2
        return \strpos($haystack, $needle, $offset);
10762
    }
10763
10764
    /**
10765
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10766
     *
10767
     * @param string $haystack <p>
10768
     *                         The string being checked.
10769
     *                         </p>
10770
     * @param string $needle   <p>
10771
     *                         The position counted from the beginning of haystack.
10772
     *                         </p>
10773
     * @param int    $offset   [optional] <p>
10774
     *                         The search offset. If it is not specified, 0 is used.
10775
     *                         </p>
10776
     *
10777
     * @psalm-pure
10778
     *
10779
     * @return false|int
10780
     *                   <p>The numeric position of the first occurrence of needle in the
10781
     *                   haystack string. If needle is not found, it returns false.</p>
10782
     */
10783 2
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10784
    {
10785 2
        if ($haystack === '' || $needle === '') {
10786
            return false;
10787
        }
10788
10789 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10790
            // "mb_" is available if overload is used, so use it ...
10791
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10792
        }
10793
10794 2
        return \stripos($haystack, $needle, $offset);
10795
    }
10796
10797
    /**
10798
     * Find the last occurrence of a character in a string within another.
10799
     *
10800
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10801
     *
10802
     * @see http://php.net/manual/en/function.mb-strrchr.php
10803
     *
10804
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10805
     * @param string $needle        <p>The string to find in haystack</p>
10806
     * @param bool   $before_needle [optional] <p>
10807
     *                              Determines which portion of haystack
10808
     *                              this function returns.
10809
     *                              If set to true, it returns all of haystack
10810
     *                              from the beginning to the last occurrence of needle.
10811
     *                              If set to false, it returns all of haystack
10812
     *                              from the last occurrence of needle to the end,
10813
     *                              </p>
10814
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10815
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10816
     *
10817
     * @psalm-pure
10818
     *
10819
     * @return false|string
10820
     *                      <p>The portion of haystack or false if needle is not found.</p>
10821
     */
10822 2
    public static function strrchr(
10823
        string $haystack,
10824
        string $needle,
10825
        bool $before_needle = false,
10826
        string $encoding = 'UTF-8',
10827
        bool $clean_utf8 = false
10828
    ) {
10829 2
        if ($haystack === '' || $needle === '') {
10830 2
            return false;
10831
        }
10832
10833 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10834 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10835
        }
10836
10837 2
        if ($clean_utf8) {
10838
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10839
            // if invalid characters are found in $haystack before $needle
10840 2
            $needle = self::clean($needle);
10841 2
            $haystack = self::clean($haystack);
10842
        }
10843
10844
        //
10845
        // fallback via mbstring
10846
        //
10847
10848 2
        if (self::$SUPPORT['mbstring'] === true) {
10849 2
            if ($encoding === 'UTF-8') {
10850 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10851
            }
10852
10853 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10854
        }
10855
10856
        //
10857
        // fallback for binary || ascii only
10858
        //
10859
10860
        if (
10861
            !$before_needle
10862
            &&
10863
            (
10864
                $encoding === 'CP850'
10865
                ||
10866
                $encoding === 'ASCII'
10867
            )
10868
        ) {
10869
            return \strrchr($haystack, $needle);
10870
        }
10871
10872
        if (
10873
            $encoding !== 'UTF-8'
10874
            &&
10875
            self::$SUPPORT['mbstring'] === false
10876
        ) {
10877
            /**
10878
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10879
             */
10880
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10881
        }
10882
10883
        //
10884
        // fallback via iconv
10885
        //
10886
10887
        if (self::$SUPPORT['iconv'] === true) {
10888
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10889
            if ($needle_tmp === false) {
10890
                return false;
10891
            }
10892
            $needle = $needle_tmp;
10893
10894
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10895
            if ($pos === false) {
10896
                return false;
10897
            }
10898
10899
            if ($before_needle) {
10900
                return self::substr($haystack, 0, $pos, $encoding);
10901
            }
10902
10903
            return self::substr($haystack, $pos, null, $encoding);
10904
        }
10905
10906
        //
10907
        // fallback via vanilla php
10908
        //
10909
10910
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10911
        if ($needle_tmp === false) {
10912
            return false;
10913
        }
10914
        $needle = $needle_tmp;
10915
10916
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10917
        if ($pos === false) {
10918
            return false;
10919
        }
10920
10921
        if ($before_needle) {
10922
            return self::substr($haystack, 0, $pos, $encoding);
10923
        }
10924
10925
        return self::substr($haystack, $pos, null, $encoding);
10926
    }
10927
10928
    /**
10929
     * Reverses characters order in the string.
10930
     *
10931
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10932
     *
10933
     * @param string $str      <p>The input string.</p>
10934
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10935
     *
10936
     * @psalm-pure
10937
     *
10938
     * @return string
10939
     *                <p>The string with characters in the reverse sequence.</p>
10940
     */
10941 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10942
    {
10943 10
        if ($str === '') {
10944 4
            return '';
10945
        }
10946
10947
        // init
10948 8
        $reversed = '';
10949
10950 8
        $str = self::emoji_encode($str, true);
10951
10952 8
        if ($encoding === 'UTF-8') {
10953 8
            if (self::$SUPPORT['intl'] === true) {
10954
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10955 8
                $i = (int) \grapheme_strlen($str);
10956 8
                while ($i--) {
10957 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10958 8
                    if ($reversed_tmp !== false) {
10959 8
                        $reversed .= $reversed_tmp;
10960
                    }
10961
                }
10962
            } else {
10963
                $i = (int) \mb_strlen($str);
10964 8
                while ($i--) {
10965
                    $reversed_tmp = \mb_substr($str, $i, 1);
10966
                    if ($reversed_tmp !== false) {
10967
                        $reversed .= $reversed_tmp;
10968
                    }
10969
                }
10970
            }
10971
        } else {
10972
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10973
10974
            $i = (int) self::strlen($str, $encoding);
10975
            while ($i--) {
10976
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10977
                if ($reversed_tmp !== false) {
10978
                    $reversed .= $reversed_tmp;
10979
                }
10980
            }
10981
        }
10982
10983 8
        return self::emoji_decode($reversed, true);
10984
    }
10985
10986
    /**
10987
     * Find the last occurrence of a character in a string within another, case-insensitive.
10988
     *
10989
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10990
     *
10991
     * @see http://php.net/manual/en/function.mb-strrichr.php
10992
     *
10993
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10994
     * @param string $needle        <p>The string to find in haystack.</p>
10995
     * @param bool   $before_needle [optional] <p>
10996
     *                              Determines which portion of haystack
10997
     *                              this function returns.
10998
     *                              If set to true, it returns all of haystack
10999
     *                              from the beginning to the last occurrence of needle.
11000
     *                              If set to false, it returns all of haystack
11001
     *                              from the last occurrence of needle to the end,
11002
     *                              </p>
11003
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11004
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11005
     *
11006
     * @psalm-pure
11007
     *
11008
     * @return false|string
11009
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
11010
     */
11011 3
    public static function strrichr(
11012
        string $haystack,
11013
        string $needle,
11014
        bool $before_needle = false,
11015
        string $encoding = 'UTF-8',
11016
        bool $clean_utf8 = false
11017
    ) {
11018 3
        if ($haystack === '' || $needle === '') {
11019 2
            return false;
11020
        }
11021
11022 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11023 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11024
        }
11025
11026 3
        if ($clean_utf8) {
11027
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11028
            // if invalid characters are found in $haystack before $needle
11029 2
            $needle = self::clean($needle);
11030 2
            $haystack = self::clean($haystack);
11031
        }
11032
11033
        //
11034
        // fallback via mbstring
11035
        //
11036
11037 3
        if (self::$SUPPORT['mbstring'] === true) {
11038 3
            if ($encoding === 'UTF-8') {
11039 3
                return \mb_strrichr($haystack, $needle, $before_needle);
11040
            }
11041
11042 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
11043
        }
11044
11045
        //
11046
        // fallback via vanilla php
11047
        //
11048
11049
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
11050
        if ($needle_tmp === false) {
11051
            return false;
11052
        }
11053
        $needle = $needle_tmp;
11054
11055
        $pos = self::strripos($haystack, $needle, 0, $encoding);
11056
        if ($pos === false) {
11057
            return false;
11058
        }
11059
11060
        if ($before_needle) {
11061
            return self::substr($haystack, 0, $pos, $encoding);
11062
        }
11063
11064
        return self::substr($haystack, $pos, null, $encoding);
11065
    }
11066
11067
    /**
11068
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
11069
     *
11070
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11071
     *
11072
     * @param string     $haystack   <p>The string to look in.</p>
11073
     * @param int|string $needle     <p>The string to look for.</p>
11074
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
11075
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11076
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11077
     *
11078
     * @psalm-pure
11079
     *
11080
     * @return false|int
11081
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11082
     *                   string.<br>If needle is not found, it returns false.</p>
11083
     */
11084 14
    public static function strripos(
11085
        string $haystack,
11086
        $needle,
11087
        int $offset = 0,
11088
        string $encoding = 'UTF-8',
11089
        bool $clean_utf8 = false
11090
    ) {
11091 14
        if ($haystack === '') {
11092 3
            if (\PHP_VERSION_ID >= 80000) {
11093 3
                if ($needle === '') {
11094 3
                    return 0;
11095
                }
11096
            } else {
11097
                return false;
11098
            }
11099
        }
11100
11101
        // iconv and mbstring do not support integer $needle
11102 14
        if ((int) $needle === $needle && $needle >= 0) {
11103
            $needle = (string) self::chr($needle);
11104
        }
11105 14
        $needle = (string) $needle;
11106
11107 14
        if ($haystack === '') {
11108 1
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11109
                return 0;
11110
            }
11111
11112 1
            return false;
11113
        }
11114
11115 14
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11116
            return false;
11117
        }
11118
11119 14
        if ($clean_utf8) {
11120
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
11121 3
            $needle = self::clean($needle);
11122 3
            $haystack = self::clean($haystack);
11123
        }
11124
11125 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11126 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11127
        }
11128
11129
        //
11130
        // fallback via mbstrig
11131
        //
11132
11133 14
        if (self::$SUPPORT['mbstring'] === true) {
11134 14
            if ($encoding === 'UTF-8') {
11135 14
                return \mb_strripos($haystack, $needle, $offset);
11136
            }
11137
11138
            return \mb_strripos($haystack, $needle, $offset, $encoding);
11139
        }
11140
11141
        //
11142
        // fallback for binary || ascii only
11143
        //
11144
11145
        if (
11146
            $encoding === 'CP850'
11147
            ||
11148
            $encoding === 'ASCII'
11149
        ) {
11150
            return \strripos($haystack, $needle, $offset);
11151
        }
11152
11153
        if (
11154
            $encoding !== 'UTF-8'
11155
            &&
11156
            self::$SUPPORT['mbstring'] === false
11157
        ) {
11158
            /**
11159
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11160
             */
11161
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11162
        }
11163
11164
        //
11165
        // fallback via intl
11166
        //
11167
11168
        if (
11169
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11170
            &&
11171
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11172
            &&
11173
            self::$SUPPORT['intl'] === true
11174
        ) {
11175
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11176
            if ($return_tmp !== false) {
11177
                return $return_tmp;
11178
            }
11179
        }
11180
11181
        //
11182
        // fallback for ascii only
11183
        //
11184
11185
        if (ASCII::is_ascii($haystack . $needle)) {
11186
            return \strripos($haystack, $needle, $offset);
11187
        }
11188
11189
        //
11190
        // fallback via vanilla php
11191
        //
11192
11193
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11194
        $needle = self::strtocasefold($needle, true, false, $encoding);
11195
11196
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11197
    }
11198
11199
    /**
11200
     * Finds position of last occurrence of a string within another, case-insensitive.
11201
     *
11202
     * @param string $haystack <p>
11203
     *                         The string from which to get the position of the last occurrence
11204
     *                         of needle.
11205
     *                         </p>
11206
     * @param string $needle   <p>
11207
     *                         The string to find in haystack.
11208
     *                         </p>
11209
     * @param int    $offset   [optional] <p>
11210
     *                         The position in haystack
11211
     *                         to start searching.
11212
     *                         </p>
11213
     *
11214
     * @psalm-pure
11215
     *
11216
     * @return false|int
11217
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11218
     *                   haystack string, or false if needle is not found.</p>
11219
     */
11220 2
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11221
    {
11222 2
        if ($haystack === '' || $needle === '') {
11223
            return false;
11224
        }
11225
11226 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11227
            // "mb_" is available if overload is used, so use it ...
11228
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11229
        }
11230
11231 2
        return \strripos($haystack, $needle, $offset);
11232
    }
11233
11234
    /**
11235
     * Find the position of the last occurrence of a substring in a string.
11236
     *
11237
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11238
     *
11239
     * @see http://php.net/manual/en/function.mb-strrpos.php
11240
     *
11241
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11242
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11243
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11244
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11245
     *                               the end of the string.
11246
     *                               </p>
11247
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11248
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11249
     *
11250
     * @psalm-pure
11251
     *
11252
     * @return false|int
11253
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11254
     *                   string.<br>If needle is not found, it returns false.</p>
11255
     */
11256 35
    public static function strrpos(
11257
        string $haystack,
11258
        $needle,
11259
        int $offset = 0,
11260
        string $encoding = 'UTF-8',
11261
        bool $clean_utf8 = false
11262
    ) {
11263 35
        if ($haystack === '') {
11264 4
            if (\PHP_VERSION_ID >= 80000) {
11265 4
                if ($needle === '') {
11266 4
                    return 0;
11267
                }
11268
            } else {
11269
                return false;
11270
            }
11271
        }
11272
11273
        // iconv and mbstring do not support integer $needle
11274 35
        if ((int) $needle === $needle && $needle >= 0) {
11275 1
            $needle = (string) self::chr($needle);
11276
        }
11277 35
        $needle = (string) $needle;
11278
11279 35
        if ($haystack === '') {
11280 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11281
                return 0;
11282
            }
11283
11284 2
            return false;
11285
        }
11286
11287 34
        if ($needle === '' && \PHP_VERSION_ID < 80000) {
11288
            return false;
11289
        }
11290
11291 34
        if ($clean_utf8) {
11292
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11293 4
            $needle = self::clean($needle);
11294 4
            $haystack = self::clean($haystack);
11295
        }
11296
11297 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11298 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11299
        }
11300
11301
        //
11302
        // fallback via mbstring
11303
        //
11304
11305 34
        if (self::$SUPPORT['mbstring'] === true) {
11306 34
            if ($encoding === 'UTF-8') {
11307 34
                return \mb_strrpos($haystack, $needle, $offset);
11308
            }
11309
11310 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11311
        }
11312
11313
        //
11314
        // fallback for binary || ascii only
11315
        //
11316
11317
        if (
11318
            $encoding === 'CP850'
11319
            ||
11320
            $encoding === 'ASCII'
11321
        ) {
11322
            return \strrpos($haystack, $needle, $offset);
11323
        }
11324
11325
        if (
11326
            $encoding !== 'UTF-8'
11327
            &&
11328
            self::$SUPPORT['mbstring'] === false
11329
        ) {
11330
            /**
11331
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11332
             */
11333
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11334
        }
11335
11336
        //
11337
        // fallback via intl
11338
        //
11339
11340
        if (
11341
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11342
            &&
11343
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11344
            &&
11345
            self::$SUPPORT['intl'] === true
11346
        ) {
11347
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11348
            if ($return_tmp !== false) {
11349
                return $return_tmp;
11350
            }
11351
        }
11352
11353
        //
11354
        // fallback for ascii only
11355
        //
11356
11357
        if (ASCII::is_ascii($haystack . $needle)) {
11358
            return \strrpos($haystack, $needle, $offset);
11359
        }
11360
11361
        //
11362
        // fallback via vanilla php
11363
        //
11364
11365
        $haystack_tmp = null;
11366
        if ($offset > 0) {
11367
            $haystack_tmp = self::substr($haystack, $offset);
11368
        } elseif ($offset < 0) {
11369
            $haystack_tmp = self::substr($haystack, 0, $offset);
11370
            $offset = 0;
11371
        }
11372
11373
        if ($haystack_tmp !== null) {
11374
            if ($haystack_tmp === false) {
11375
                $haystack_tmp = '';
11376
            }
11377
            $haystack = (string) $haystack_tmp;
11378
        }
11379
11380
        $pos = \strrpos($haystack, $needle);
11381
        if ($pos === false) {
11382
            return false;
11383
        }
11384
11385
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11386
        $str_tmp = \substr($haystack, 0, $pos);
11387
        if ($str_tmp === false) {
11388
            return false;
11389
        }
11390
11391
        return $offset + (int) self::strlen($str_tmp);
11392
    }
11393
11394
    /**
11395
     * Find the position of the last occurrence of a substring in a string.
11396
     *
11397
     * @param string $haystack <p>
11398
     *                         The string being checked, for the last occurrence
11399
     *                         of needle.
11400
     *                         </p>
11401
     * @param string $needle   <p>
11402
     *                         The string to find in haystack.
11403
     *                         </p>
11404
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11405
     *                         the string. Negative values will stop searching at an arbitrary point
11406
     *                         prior to the end of the string.
11407
     *                         </p>
11408
     *
11409
     * @psalm-pure
11410
     *
11411
     * @return false|int
11412
     *                   <p>The numeric position of the last occurrence of needle in the
11413
     *                   haystack string. If needle is not found, it returns false.</p>
11414
     */
11415 2
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11416
    {
11417 2
        if ($haystack === '' || $needle === '') {
11418
            return false;
11419
        }
11420
11421 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11422
            // "mb_" is available if overload is used, so use it ...
11423
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11424
        }
11425
11426 2
        return \strrpos($haystack, $needle, $offset);
11427
    }
11428
11429
    /**
11430
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11431
     * mask.
11432
     *
11433
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11434
     *
11435
     * @param string   $str      <p>The input string.</p>
11436
     * @param string   $mask     <p>The mask of chars</p>
11437
     * @param int      $offset   [optional]
11438
     * @param int|null $length   [optional]
11439
     * @param string   $encoding [optional] <p>Set the charset.</p>
11440
     *
11441
     * @psalm-pure
11442
     *
11443
     * @return false|int
11444
     */
11445 10
    public static function strspn(
11446
        string $str,
11447
        string $mask,
11448
        int $offset = 0,
11449
        int $length = null,
11450
        string $encoding = 'UTF-8'
11451
    ) {
11452 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11453
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11454
        }
11455
11456 10
        if ($offset || $length !== null) {
11457 2
            if ($encoding === 'UTF-8') {
11458 2
                if ($length === null) {
11459
                    $str = (string) \mb_substr($str, $offset);
11460
                } else {
11461 2
                    $str = (string) \mb_substr($str, $offset, $length);
11462
                }
11463
            } else {
11464
                $str = (string) self::substr($str, $offset, $length, $encoding);
11465
            }
11466
        }
11467
11468 10
        if ($str === '' || $mask === '') {
11469 2
            return 0;
11470
        }
11471
11472 8
        $matches = [];
11473
11474 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11475
    }
11476
11477
    /**
11478
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11479
     *
11480
     * EXAMPLE: <code>
11481
     * $str = 'iñtërnâtiônàlizætiøn';
11482
     * $search = 'nât';
11483
     *
11484
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11485
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11486
     * </code>
11487
     *
11488
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11489
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11490
     * @param bool   $before_needle [optional] <p>
11491
     *                              If <b>TRUE</b>, strstr() returns the part of the
11492
     *                              haystack before the first occurrence of the needle (excluding the needle).
11493
     *                              </p>
11494
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11495
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11496
     *
11497
     * @psalm-pure
11498
     *
11499
     * @return false|string
11500
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11501
     */
11502 3
    public static function strstr(
11503
        string $haystack,
11504
        string $needle,
11505
        bool $before_needle = false,
11506
        string $encoding = 'UTF-8',
11507
        bool $clean_utf8 = false
11508
    ) {
11509 3
        if ($haystack === '') {
11510 2
            if (\PHP_VERSION_ID >= 80000 && $needle === '') {
11511 1
                return '';
11512
            }
11513
11514 2
            return false;
11515
        }
11516
11517 3
        if ($clean_utf8) {
11518
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11519
            // if invalid characters are found in $haystack before $needle
11520
            $needle = self::clean($needle);
11521
            $haystack = self::clean($haystack);
11522
        }
11523
11524 3
        if ($needle === '') {
11525 1
            if (\PHP_VERSION_ID >= 80000) {
11526 1
                return $haystack;
11527
            }
11528
11529
            return false;
11530
        }
11531
11532 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11533 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11534
        }
11535
11536
        //
11537
        // fallback via mbstring
11538
        //
11539
11540 3
        if (self::$SUPPORT['mbstring'] === true) {
11541 3
            if ($encoding === 'UTF-8') {
11542 3
                return \mb_strstr($haystack, $needle, $before_needle);
11543
            }
11544
11545 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11546
        }
11547
11548
        //
11549
        // fallback for binary || ascii only
11550
        //
11551
11552
        if (
11553
            $encoding === 'CP850'
11554
            ||
11555
            $encoding === 'ASCII'
11556
        ) {
11557
            return \strstr($haystack, $needle, $before_needle);
11558
        }
11559
11560
        if (
11561
            $encoding !== 'UTF-8'
11562
            &&
11563
            self::$SUPPORT['mbstring'] === false
11564
        ) {
11565
            /**
11566
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11567
             */
11568
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11569
        }
11570
11571
        //
11572
        // fallback via intl
11573
        //
11574
11575
        if (
11576
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11577
            &&
11578
            self::$SUPPORT['intl'] === true
11579
        ) {
11580
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11581
            if ($return_tmp !== false) {
11582
                return $return_tmp;
11583
            }
11584
        }
11585
11586
        //
11587
        // fallback for ascii only
11588
        //
11589
11590
        if (ASCII::is_ascii($haystack . $needle)) {
11591
            return \strstr($haystack, $needle, $before_needle);
11592
        }
11593
11594
        //
11595
        // fallback via vanilla php
11596
        //
11597
11598
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11599
11600
        if (!isset($match[1])) {
11601
            return false;
11602
        }
11603
11604
        if ($before_needle) {
11605
            return $match[1];
11606
        }
11607
11608
        return self::substr($haystack, (int) self::strlen($match[1]));
11609
    }
11610
11611
    /**
11612
     * Finds first occurrence of a string within another.
11613
     *
11614
     * @param string $haystack      <p>
11615
     *                              The string from which to get the first occurrence
11616
     *                              of needle.
11617
     *                              </p>
11618
     * @param string $needle        <p>
11619
     *                              The string to find in haystack.
11620
     *                              </p>
11621
     * @param bool   $before_needle [optional] <p>
11622
     *                              Determines which portion of haystack
11623
     *                              this function returns.
11624
     *                              If set to true, it returns all of haystack
11625
     *                              from the beginning to the first occurrence of needle.
11626
     *                              If set to false, it returns all of haystack
11627
     *                              from the first occurrence of needle to the end,
11628
     *                              </p>
11629
     *
11630
     * @psalm-pure
11631
     *
11632
     * @return false|string
11633
     *                      <p>The portion of haystack,
11634
     *                      or false if needle is not found.</p>
11635
     */
11636 2
    public static function strstr_in_byte(
11637
        string $haystack,
11638
        string $needle,
11639
        bool $before_needle = false
11640
    ) {
11641 2
        if ($haystack === '' || $needle === '') {
11642
            return false;
11643
        }
11644
11645 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11646
            // "mb_" is available if overload is used, so use it ...
11647
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11648
        }
11649
11650 2
        return \strstr($haystack, $needle, $before_needle);
11651
    }
11652
11653
    /**
11654
     * Unicode transformation for case-less matching.
11655
     *
11656
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11657
     *
11658
     * @see http://unicode.org/reports/tr21/tr21-5.html
11659
     *
11660
     * @param string      $str        <p>The input string.</p>
11661
     * @param bool        $full       [optional] <p>
11662
     *                                <b>true</b>, replace full case folding chars (default)<br>
11663
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11664
     *                                </p>
11665
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11666
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11667
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11668
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11669
     *                                is for some languages better ...</p>
11670
     *
11671
     * @psalm-pure
11672
     *
11673
     * @return string
11674
     */
11675 32
    public static function strtocasefold(
11676
        string $str,
11677
        bool $full = true,
11678
        bool $clean_utf8 = false,
11679
        string $encoding = 'UTF-8',
11680
        string $lang = null,
11681
        bool $lower = true
11682
    ): string {
11683 32
        if ($str === '') {
11684 7
            return '';
11685
        }
11686
11687 31
        if ($clean_utf8) {
11688
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11689
            // if invalid characters are found in $haystack before $needle
11690 2
            $str = self::clean($str);
11691
        }
11692
11693 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11694
11695 31
        if ($lang === null && $encoding === 'UTF-8') {
11696 31
            if ($lower) {
11697 2
                return \mb_strtolower($str);
11698
            }
11699
11700 29
            return \mb_strtoupper($str);
11701
        }
11702
11703 2
        if ($lower) {
11704
            return self::strtolower($str, $encoding, false, $lang);
11705
        }
11706
11707 2
        return self::strtoupper($str, $encoding, false, $lang);
11708
    }
11709
11710
    /**
11711
     * Make a string lowercase.
11712
     *
11713
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11714
     *
11715
     * @see http://php.net/manual/en/function.mb-strtolower.php
11716
     *
11717
     * @param string      $str                           <p>The string being lowercased.</p>
11718
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11719
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11720
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11721
     *                                                   tr</p>
11722
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11723
     *                                                   -> ß</p>
11724
     *
11725
     * @psalm-pure
11726
     *
11727
     * @return string
11728
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11729
     */
11730 73
    public static function strtolower(
11731
        $str,
11732
        string $encoding = 'UTF-8',
11733
        bool $clean_utf8 = false,
11734
        string $lang = null,
11735
        bool $try_to_keep_the_string_length = false
11736
    ): string {
11737
        // init
11738 73
        $str = (string) $str;
11739
11740 73
        if ($str === '') {
11741 1
            return '';
11742
        }
11743
11744 72
        if ($clean_utf8) {
11745
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11746
            // if invalid characters are found in $haystack before $needle
11747 2
            $str = self::clean($str);
11748
        }
11749
11750
        // hack for old php version or for the polyfill ...
11751 72
        if ($try_to_keep_the_string_length) {
11752
            $str = self::fixStrCaseHelper($str, true);
11753
        }
11754
11755 72
        if ($lang === null && $encoding === 'UTF-8') {
11756 13
            return \mb_strtolower($str);
11757
        }
11758
11759 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11760
11761 61
        if ($lang !== null) {
11762 2
            if (self::$SUPPORT['intl'] === true) {
11763 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11764
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11765
                }
11766
11767 2
                $language_code = $lang . '-Lower';
11768 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11769
                    /**
11770
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11771
                     */
11772
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
0 ignored issues
show
Bug introduced by
Are you sure print_r(self::INTL_TRANSLITERATOR_LIST, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11772
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . /** @scrutinizer ignore-type */ \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
Loading history...
11773
11774
                    $language_code = 'Any-Lower';
11775
                }
11776
11777
                /** @noinspection PhpComposerExtensionStubsInspection */
11778
                /** @noinspection UnnecessaryCastingInspection */
11779 2
                return (string) \transliterator_transliterate($language_code, $str);
11780
            }
11781
11782
            /**
11783
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11784
             */
11785
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11786
        }
11787
11788
        // always fallback via symfony polyfill
11789 61
        return \mb_strtolower($str, $encoding);
11790
    }
11791
11792
    /**
11793
     * Make a string uppercase.
11794
     *
11795
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11796
     *
11797
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11798
     *
11799
     * @param string      $str                           <p>The string being uppercased.</p>
11800
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11801
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11802
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11803
     *                                                   tr</p>
11804
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11805
     *                                                   -> ß</p>
11806
     *
11807
     * @psalm-pure
11808
     *
11809
     * @return string
11810
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11811
     */
11812 17
    public static function strtoupper(
11813
        $str,
11814
        string $encoding = 'UTF-8',
11815
        bool $clean_utf8 = false,
11816
        string $lang = null,
11817
        bool $try_to_keep_the_string_length = false
11818
    ): string {
11819
        // init
11820 17
        $str = (string) $str;
11821
11822 17
        if ($str === '') {
11823 1
            return '';
11824
        }
11825
11826 16
        if ($clean_utf8) {
11827
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11828
            // if invalid characters are found in $haystack before $needle
11829 2
            $str = self::clean($str);
11830
        }
11831
11832
        // hack for old php version or for the polyfill ...
11833 16
        if ($try_to_keep_the_string_length) {
11834 2
            $str = self::fixStrCaseHelper($str);
11835
        }
11836
11837 16
        if ($lang === null && $encoding === 'UTF-8') {
11838 8
            return \mb_strtoupper($str);
11839
        }
11840
11841 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11842
11843 10
        if ($lang !== null) {
11844 2
            if (self::$SUPPORT['intl'] === true) {
11845 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11846
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11847
                }
11848
11849 2
                $language_code = $lang . '-Upper';
11850 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11851
                    /**
11852
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11853
                     */
11854
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11855
11856
                    $language_code = 'Any-Upper';
11857
                }
11858
11859
                /** @noinspection PhpComposerExtensionStubsInspection */
11860
                /** @noinspection UnnecessaryCastingInspection */
11861 2
                return (string) \transliterator_transliterate($language_code, $str);
11862
            }
11863
11864
            /**
11865
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11866
             */
11867
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11868
        }
11869
11870
        // always fallback via symfony polyfill
11871 10
        return \mb_strtoupper($str, $encoding);
11872
    }
11873
11874
    /**
11875
     * Translate characters or replace sub-strings.
11876
     *
11877
     * EXAMPLE:
11878
     * <code>
11879
     * $array = [
11880
     *     'Hello'   => '○●◎',
11881
     *     '中文空白' => 'earth',
11882
     * ];
11883
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11884
     * </code>
11885
     *
11886
     * @see http://php.net/manual/en/function.strtr.php
11887
     *
11888
     * @param string          $str  <p>The string being translated.</p>
11889
     * @param string|string[] $from <p>The string replacing from.</p>
11890
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11891
     *
11892
     * @psalm-pure
11893
     *
11894
     * @return string
11895
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11896
     *                to the corresponding character in "to".</p>
11897
     */
11898 2
    public static function strtr(string $str, $from, $to = ''): string
11899
    {
11900 2
        if ($str === '') {
11901
            return '';
11902
        }
11903
11904 2
        if ($from === $to) {
11905
            return $str;
11906
        }
11907
11908 2
        if ($to !== '') {
11909 2
            if (!\is_array($from)) {
11910 2
                $from = self::str_split($from);
11911
            }
11912
11913 2
            if (!\is_array($to)) {
11914 2
                $to = self::str_split($to);
11915
            }
11916
11917 2
            $count_from = \count($from);
11918 2
            $count_to = \count($to);
11919
11920 2
            if ($count_from !== $count_to) {
11921 2
                if ($count_from > $count_to) {
11922 2
                    $from = \array_slice($from, 0, $count_to);
11923 2
                } elseif ($count_from < $count_to) {
11924 2
                    $to = \array_slice($to, 0, $count_from);
11925
                }
11926
            }
11927
11928 2
            $from = \array_combine($from, $to);
11929
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11930 2
            if ($from === false) {
11931
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
0 ignored issues
show
Bug introduced by
Are you sure print_r($from, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11931
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . /** @scrutinizer ignore-type */ \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
Loading history...
Bug introduced by
Are you sure print_r($to, true) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11931
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . /** @scrutinizer ignore-type */ \print_r($to, true) . ')');
Loading history...
11932
            }
11933
        }
11934
11935 2
        if (\is_string($from)) {
11936 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11936
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11937
        }
11938
11939 2
        return \strtr($str, $from);
11940
    }
11941
11942
    /**
11943
     * Return the width of a string.
11944
     *
11945
     * INFO: use UTF8::strlen() for the byte-length
11946
     *
11947
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11948
     *
11949
     * @param string $str        <p>The input string.</p>
11950
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11951
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11952
     *
11953
     * @psalm-pure
11954
     *
11955
     * @return int
11956
     */
11957 2
    public static function strwidth(
11958
        string $str,
11959
        string $encoding = 'UTF-8',
11960
        bool $clean_utf8 = false
11961
    ): int {
11962 2
        if ($str === '') {
11963 2
            return 0;
11964
        }
11965
11966 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11967 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11968
        }
11969
11970 2
        if ($clean_utf8) {
11971
            // iconv and mbstring are not tolerant to invalid encoding
11972
            // further, their behaviour is inconsistent with that of PHP's substr
11973 2
            $str = self::clean($str);
11974
        }
11975
11976
        //
11977
        // fallback via mbstring
11978
        //
11979
11980 2
        if (self::$SUPPORT['mbstring'] === true) {
11981 2
            if ($encoding === 'UTF-8') {
11982 2
                return \mb_strwidth($str);
11983
            }
11984
11985
            return \mb_strwidth($str, $encoding);
11986
        }
11987
11988
        //
11989
        // fallback via vanilla php
11990
        //
11991
11992
        if ($encoding !== 'UTF-8') {
11993
            $str = self::encode('UTF-8', $str, false, $encoding);
11994
        }
11995
11996
        $wide = 0;
11997
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11998
11999
        return ($wide << 1) + (int) self::strlen($str);
12000
    }
12001
12002
    /**
12003
     * Get part of a string.
12004
     *
12005
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
12006
     *
12007
     * @see http://php.net/manual/en/function.mb-substr.php
12008
     *
12009
     * @param string   $str        <p>The string being checked.</p>
12010
     * @param int      $offset     <p>The first position used in str.</p>
12011
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
12012
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12013
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12014
     *
12015
     * @psalm-pure
12016
     *
12017
     * @return false|string
12018
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12019
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12020
     *                      characters long, <b>FALSE</b> will be returned.
12021
     */
12022 172
    public static function substr(
12023
        string $str,
12024
        int $offset = 0,
12025
        int $length = null,
12026
        string $encoding = 'UTF-8',
12027
        bool $clean_utf8 = false
12028
    ) {
12029
        // empty string
12030 172
        if ($str === '' || $length === 0) {
12031 8
            return '';
12032
        }
12033
12034 168
        if ($clean_utf8) {
12035
            // iconv and mbstring are not tolerant to invalid encoding
12036
            // further, their behaviour is inconsistent with that of PHP's substr
12037 2
            $str = self::clean($str);
12038
        }
12039
12040
        // whole string
12041 168
        if (!$offset && $length === null) {
12042 7
            return $str;
12043
        }
12044
12045 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12046 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12047
        }
12048
12049
        //
12050
        // fallback via mbstring
12051
        //
12052
12053 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
12054 161
            if ($length === null) {
12055 64
                return \mb_substr($str, $offset);
12056
            }
12057
12058 102
            return \mb_substr($str, $offset, $length);
12059
        }
12060
12061
        //
12062
        // fallback for binary || ascii only
12063
        //
12064
12065
        if (
12066 4
            $encoding === 'CP850'
12067
            ||
12068 4
            $encoding === 'ASCII'
12069
        ) {
12070
            if ($length === null) {
12071
                return \substr($str, $offset);
12072
            }
12073
12074
            return \substr($str, $offset, $length);
12075
        }
12076
12077
        // otherwise we need the string-length
12078 4
        $str_length = 0;
12079 4
        if ($offset || $length === null) {
12080 4
            $str_length = self::strlen($str, $encoding);
12081
        }
12082
12083
        // e.g.: invalid chars + mbstring not installed
12084 4
        if ($str_length === false) {
12085
            return false;
12086
        }
12087
12088
        // empty string
12089 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
12090
            return '';
12091
        }
12092
12093
        // impossible
12094 4
        if ($offset && $offset > $str_length) {
12095
            return '';
12096
        }
12097
12098 4
        $length = $length ?? $str_length;
12099
12100
        if (
12101 4
            $encoding !== 'UTF-8'
12102
            &&
12103 4
            self::$SUPPORT['mbstring'] === false
12104
        ) {
12105
            /**
12106
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12107
             */
12108 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12109
        }
12110
12111
        //
12112
        // fallback via intl
12113
        //
12114
12115
        if (
12116 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
12117
            &&
12118 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
12119
            &&
12120 4
            self::$SUPPORT['intl'] === true
12121
        ) {
12122
            $return_tmp = \grapheme_substr($str, $offset, $length);
12123
            if ($return_tmp !== false) {
12124
                return $return_tmp;
12125
            }
12126
        }
12127
12128
        //
12129
        // fallback via iconv
12130
        //
12131
12132
        if (
12133 4
            $length >= 0 // "iconv_substr()" can't handle negative length
12134
            &&
12135 4
            self::$SUPPORT['iconv'] === true
12136
        ) {
12137
            $return_tmp = \iconv_substr($str, $offset, $length);
12138
            if ($return_tmp !== false) {
12139
                return $return_tmp;
12140
            }
12141
        }
12142
12143
        //
12144
        // fallback for ascii only
12145
        //
12146
12147 4
        if (ASCII::is_ascii($str)) {
12148
            return \substr($str, $offset, $length);
12149
        }
12150
12151
        //
12152
        // fallback via vanilla php
12153
        //
12154
12155
        // split to array, and remove invalid characters
12156
        // &&
12157
        // extract relevant part, and join to make sting again
12158 4
        return \implode('', \array_slice(self::str_split($str), $offset, $length));
12159
    }
12160
12161
    /**
12162
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
12163
     *
12164
     * EXAMPLE: <code>
12165
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
12166
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
12167
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
12168
     * </code>
12169
     *
12170
     * @param string   $str1               <p>The main string being compared.</p>
12171
     * @param string   $str2               <p>The secondary string being compared.</p>
12172
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
12173
     *                                     counting from the end of the string.</p>
12174
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
12175
     *                                     of the length of the str compared to the length of main_str less the
12176
     *                                     offset.</p>
12177
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
12178
     *                                     insensitive.</p>
12179
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
12180
     *
12181
     * @psalm-pure
12182
     *
12183
     * @return int
12184
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12185
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12186
     *             <strong>0</strong> if they are equal
12187
     */
12188 2
    public static function substr_compare(
12189
        string $str1,
12190
        string $str2,
12191
        int $offset = 0,
12192
        int $length = null,
12193
        bool $case_insensitivity = false,
12194
        string $encoding = 'UTF-8'
12195
    ): int {
12196
        if (
12197 2
            $offset !== 0
12198
            ||
12199 2
            $length !== null
12200
        ) {
12201 2
            if ($encoding === 'UTF-8') {
12202 2
                if ($length === null) {
12203 2
                    $str1 = (string) \mb_substr($str1, $offset);
12204
                } else {
12205 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12206
                }
12207 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12208
            } else {
12209
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12210
12211
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12212
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12213
            }
12214
        }
12215
12216 2
        if ($case_insensitivity) {
12217 2
            return self::strcasecmp($str1, $str2, $encoding);
12218
        }
12219
12220 2
        return self::strcmp($str1, $str2);
12221
    }
12222
12223
    /**
12224
     * Count the number of substring occurrences.
12225
     *
12226
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12227
     *
12228
     * @see http://php.net/manual/en/function.substr-count.php
12229
     *
12230
     * @param string   $haystack   <p>The string to search in.</p>
12231
     * @param string   $needle     <p>The substring to search for.</p>
12232
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12233
     * @param int|null $length     [optional] <p>
12234
     *                             The maximum length after the specified offset to search for the
12235
     *                             substring. It outputs a warning if the offset plus the length is
12236
     *                             greater than the haystack length.
12237
     *                             </p>
12238
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12239
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12240
     *
12241
     * @psalm-pure
12242
     *
12243
     * @return false|int
12244
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12245
     */
12246 5
    public static function substr_count(
12247
        string $haystack,
12248
        string $needle,
12249
        int $offset = 0,
12250
        int $length = null,
12251
        string $encoding = 'UTF-8',
12252
        bool $clean_utf8 = false
12253
    ) {
12254 5
        if ($needle === '') {
12255 2
            return false;
12256
        }
12257
12258 5
        if ($haystack === '') {
12259 2
            if (\PHP_VERSION_ID >= 80000) {
12260 2
                return 0;
12261
            }
12262
12263
            return 0;
12264
        }
12265
12266 5
        if ($length === 0) {
12267 2
            return 0;
12268
        }
12269
12270 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12271 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12272
        }
12273
12274 5
        if ($clean_utf8) {
12275
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12276
            // if invalid characters are found in $haystack before $needle
12277
            $needle = self::clean($needle);
12278
            $haystack = self::clean($haystack);
12279
        }
12280
12281 5
        if ($offset || $length > 0) {
12282 2
            if ($length === null) {
12283 2
                $length_tmp = self::strlen($haystack, $encoding);
12284 2
                if ($length_tmp === false) {
12285
                    return false;
12286
                }
12287 2
                $length = $length_tmp;
12288
            }
12289
12290 2
            if ($encoding === 'UTF-8') {
12291 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12292
            } else {
12293 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12294
            }
12295
        }
12296
12297
        if (
12298 5
            $encoding !== 'UTF-8'
12299
            &&
12300 5
            self::$SUPPORT['mbstring'] === false
12301
        ) {
12302
            /**
12303
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12304
             */
12305
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12306
        }
12307
12308 5
        if (self::$SUPPORT['mbstring'] === true) {
12309 5
            if ($encoding === 'UTF-8') {
12310 5
                return \mb_substr_count($haystack, $needle);
12311
            }
12312
12313 2
            return \mb_substr_count($haystack, $needle, $encoding);
12314
        }
12315
12316
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12317
12318
        return \count($matches);
12319
    }
12320
12321
    /**
12322
     * Count the number of substring occurrences.
12323
     *
12324
     * @param string   $haystack <p>
12325
     *                           The string being checked.
12326
     *                           </p>
12327
     * @param string   $needle   <p>
12328
     *                           The string being found.
12329
     *                           </p>
12330
     * @param int      $offset   [optional] <p>
12331
     *                           The offset where to start counting
12332
     *                           </p>
12333
     * @param int|null $length   [optional] <p>
12334
     *                           The maximum length after the specified offset to search for the
12335
     *                           substring. It outputs a warning if the offset plus the length is
12336
     *                           greater than the haystack length.
12337
     *                           </p>
12338
     *
12339
     * @psalm-pure
12340
     *
12341
     * @return false|int
12342
     *                   <p>The number of times the
12343
     *                   needle substring occurs in the
12344
     *                   haystack string.</p>
12345
     */
12346 4
    public static function substr_count_in_byte(
12347
        string $haystack,
12348
        string $needle,
12349
        int $offset = 0,
12350
        int $length = null
12351
    ) {
12352 4
        if ($haystack === '' || $needle === '') {
12353 1
            return 0;
12354
        }
12355
12356
        if (
12357 3
            ($offset || $length !== null)
12358
            &&
12359 3
            self::$SUPPORT['mbstring_func_overload'] === true
12360
        ) {
12361
            if ($length === null) {
12362
                $length_tmp = self::strlen($haystack);
12363
                if ($length_tmp === false) {
12364
                    return false;
12365
                }
12366
                $length = $length_tmp;
12367
            }
12368
12369
            if (
12370
                (
12371
                    $length !== 0
12372
                    &&
12373
                    $offset !== 0
12374
                )
12375
                &&
12376
                ($length + $offset) <= 0
12377
                &&
12378
                \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1
12379
            ) {
12380
                return false;
12381
            }
12382
12383
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12384
            $haystack_tmp = \substr($haystack, $offset, $length);
12385
            if ($haystack_tmp === false) {
12386
                $haystack_tmp = '';
12387
            }
12388
            $haystack = (string) $haystack_tmp;
12389
        }
12390
12391 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12392
            // "mb_" is available if overload is used, so use it ...
12393
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12394
        }
12395
12396 3
        if ($length === null) {
12397 3
            return \substr_count($haystack, $needle, $offset);
12398
        }
12399
12400
        return \substr_count($haystack, $needle, $offset, $length);
12401
    }
12402
12403
    /**
12404
     * Returns the number of occurrences of $substring in the given string.
12405
     * By default, the comparison is case-sensitive, but can be made insensitive
12406
     * by setting $case_sensitive to false.
12407
     *
12408
     * @param string $str            <p>The input string.</p>
12409
     * @param string $substring      <p>The substring to search for.</p>
12410
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12411
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12412
     *
12413
     * @psalm-pure
12414
     *
12415
     * @return int
12416
     */
12417 15
    public static function substr_count_simple(
12418
        string $str,
12419
        string $substring,
12420
        bool $case_sensitive = true,
12421
        string $encoding = 'UTF-8'
12422
    ): int {
12423 15
        if ($str === '' || $substring === '') {
12424 2
            return 0;
12425
        }
12426
12427 13
        if ($encoding === 'UTF-8') {
12428 7
            if ($case_sensitive) {
12429
                return (int) \mb_substr_count($str, $substring);
12430
            }
12431
12432 7
            return (int) \mb_substr_count(
12433 7
                \mb_strtoupper($str),
12434 7
                \mb_strtoupper($substring)
12435
            );
12436
        }
12437
12438 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12439
12440 6
        if ($case_sensitive) {
12441 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12442
        }
12443
12444 3
        return (int) \mb_substr_count(
12445 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12446 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12447 3
            $encoding
12448
        );
12449
    }
12450
12451
    /**
12452
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12453
     *
12454
     * EXMAPLE: <code>
12455
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12456
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12457
     * </code>
12458
     *
12459
     * @param string $haystack <p>The string to search in.</p>
12460
     * @param string $needle   <p>The substring to search for.</p>
12461
     *
12462
     * @psalm-pure
12463
     *
12464
     * @return string
12465
     *                <p>Return the sub-string.</p>
12466
     */
12467 2
    public static function substr_ileft(string $haystack, string $needle): string
12468
    {
12469 2
        if ($haystack === '') {
12470 2
            return '';
12471
        }
12472
12473 2
        if ($needle === '') {
12474 2
            return $haystack;
12475
        }
12476
12477 2
        if (self::str_istarts_with($haystack, $needle)) {
12478 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12479
        }
12480
12481 2
        return $haystack;
12482
    }
12483
12484
    /**
12485
     * Get part of a string process in bytes.
12486
     *
12487
     * @param string   $str    <p>The string being checked.</p>
12488
     * @param int      $offset <p>The first position used in str.</p>
12489
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12490
     *
12491
     * @psalm-pure
12492
     *
12493
     * @return false|string
12494
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12495
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12496
     *                      characters long, <b>FALSE</b> will be returned.
12497
     */
12498 1
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12499
    {
12500
        // empty string
12501 1
        if ($str === '' || $length === 0) {
12502
            return '';
12503
        }
12504
12505
        // whole string
12506 1
        if (!$offset && $length === null) {
12507
            return $str;
12508
        }
12509
12510 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12511
            // "mb_" is available if overload is used, so use it ...
12512
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12513
        }
12514
12515 1
        return \substr($str, $offset, $length ?? 2147483647);
12516
    }
12517
12518
    /**
12519
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12520
     *
12521
     * EXAMPLE: <code>
12522
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12523
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12524
     * </code>
12525
     *
12526
     * @param string $haystack <p>The string to search in.</p>
12527
     * @param string $needle   <p>The substring to search for.</p>
12528
     *
12529
     * @psalm-pure
12530
     *
12531
     * @return string
12532
     *                <p>Return the sub-string.<p>
12533
     */
12534 2
    public static function substr_iright(string $haystack, string $needle): string
12535
    {
12536 2
        if ($haystack === '') {
12537 2
            return '';
12538
        }
12539
12540 2
        if ($needle === '') {
12541 2
            return $haystack;
12542
        }
12543
12544 2
        if (self::str_iends_with($haystack, $needle)) {
12545 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12546
        }
12547
12548 2
        return $haystack;
12549
    }
12550
12551
    /**
12552
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12553
     *
12554
     * EXAMPLE: <code>
12555
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12556
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12557
     * </code>
12558
     *
12559
     * @param string $haystack <p>The string to search in.</p>
12560
     * @param string $needle   <p>The substring to search for.</p>
12561
     *
12562
     * @psalm-pure
12563
     *
12564
     * @return string
12565
     *                <p>Return the sub-string.</p>
12566
     */
12567 2
    public static function substr_left(string $haystack, string $needle): string
12568
    {
12569 2
        if ($haystack === '') {
12570 2
            return '';
12571
        }
12572
12573 2
        if ($needle === '') {
12574 2
            return $haystack;
12575
        }
12576
12577 2
        if (self::str_starts_with($haystack, $needle)) {
12578 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12579
        }
12580
12581 2
        return $haystack;
12582
    }
12583
12584
    /**
12585
     * Replace text within a portion of a string.
12586
     *
12587
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12588
     *
12589
     * source: https://gist.github.com/stemar/8287074
12590
     *
12591
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12592
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12593
     * @param int|int[]       $offset      <p>
12594
     *                                     If start is positive, the replacing will begin at the start'th offset
12595
     *                                     into string.
12596
     *                                     <br><br>
12597
     *                                     If start is negative, the replacing will begin at the start'th character
12598
     *                                     from the end of string.
12599
     *                                     </p>
12600
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12601
     *                                     portion of string which is to be replaced. If it is negative, it
12602
     *                                     represents the number of characters from the end of string at which to
12603
     *                                     stop replacing. If it is not given, then it will default to strlen(
12604
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12605
     *                                     length is zero then this function will have the effect of inserting
12606
     *                                     replacement into string at the given start offset.</p>
12607
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12608
     *
12609
     * @psalm-pure
12610
     *
12611
     * @return string|string[]
12612
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12613
     *
12614
     * @template TSubstrReplace
12615
     * @phpstan-param TSubstrReplace $str
12616
     * @phpstan-return TSubstrReplace
12617
     */
12618 10
    public static function substr_replace(
12619
        $str,
12620
        $replacement,
12621
        $offset,
12622
        $length = null,
12623
        string $encoding = 'UTF-8'
12624
    ) {
12625 10
        if (\is_array($str)) {
12626 1
            $num = \count($str);
12627
12628
            // the replacement
12629 1
            if (\is_array($replacement)) {
12630 1
                $replacement = \array_slice($replacement, 0, $num);
12631
            } else {
12632 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12633
            }
12634
12635
            // the offset
12636 1
            if (\is_array($offset)) {
12637 1
                $offset = \array_slice($offset, 0, $num);
12638 1
                foreach ($offset as &$value_tmp) {
12639 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12640
                }
12641 1
                unset($value_tmp);
12642
            } else {
12643 1
                $offset = \array_pad([$offset], $num, $offset);
12644
            }
12645
12646
            // the length
12647 1
            if ($length === null) {
12648 1
                $length = \array_fill(0, $num, 0);
12649 1
            } elseif (\is_array($length)) {
12650 1
                $length = \array_slice($length, 0, $num);
12651 1
                foreach ($length as &$value_tmp_V2) {
12652 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12653
                }
12654 1
                unset($value_tmp_V2);
12655
            } else {
12656 1
                $length = \array_pad([$length], $num, $length);
12657
            }
12658
12659
            // recursive call
12660
            /** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */
12661 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12662
        }
12663
12664 10
        if (\is_array($replacement)) {
12665 1
            if ($replacement !== []) {
12666 1
                $replacement = $replacement[0];
12667
            } else {
12668 1
                $replacement = '';
12669
            }
12670
        }
12671
12672
        // init
12673 10
        $str = (string) $str;
12674 10
        $replacement = (string) $replacement;
12675
12676 10
        if (\is_array($length)) {
12677
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12678
        }
12679
12680 10
        if (\is_array($offset)) {
12681
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12682
        }
12683
12684 10
        if ($str === '') {
12685 5
            return $replacement;
12686
        }
12687
12688 9
        if (self::$SUPPORT['mbstring'] === true) {
12689 9
            $string_length = (int) self::strlen($str, $encoding);
12690
12691 9
            if ($offset < 0) {
12692 1
                $offset = (int) \max(0, $string_length + $offset);
12693 9
            } elseif ($offset > $string_length) {
12694 1
                $offset = $string_length;
12695
            }
12696
12697 9
            if ($length !== null && $length < 0) {
12698 1
                $length = (int) \max(0, $string_length - $offset + $length);
12699 9
            } elseif ($length === null || $length > $string_length) {
12700 4
                $length = $string_length;
12701
            }
12702
12703
            /** @noinspection AdditionOperationOnArraysInspection */
12704 9
            if (($offset + $length) > $string_length) {
12705 4
                $length = $string_length - $offset;
12706
            }
12707
12708
            /** @noinspection AdditionOperationOnArraysInspection */
12709 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12710 9
                   $replacement .
12711 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12712
        }
12713
12714
        //
12715
        // fallback for ascii only
12716
        //
12717
12718
        if (ASCII::is_ascii($str)) {
12719
            return ($length === null) ?
12720
                \substr_replace($str, $replacement, $offset) :
12721
                \substr_replace($str, $replacement, $offset, $length);
12722
        }
12723
12724
        //
12725
        // fallback via vanilla php
12726
        //
12727
12728
        \preg_match_all('/./us', $str, $str_matches);
12729
        \preg_match_all('/./us', $replacement, $replacement_matches);
12730
12731
        if ($length === null) {
12732
            $length_tmp = self::strlen($str, $encoding);
12733
            if ($length_tmp === false) {
12734
                // e.g.: non mbstring support + invalid chars
12735
                return '';
12736
            }
12737
            $length = $length_tmp;
12738
        }
12739
12740
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12741
12742
        return \implode('', $str_matches[0]);
12743
    }
12744
12745
    /**
12746
     * Removes a suffix ($needle) from the end of the string ($haystack).
12747
     *
12748
     * EXAMPLE: <code>
12749
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12750
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12751
     * </code>
12752
     *
12753
     * @param string $haystack <p>The string to search in.</p>
12754
     * @param string $needle   <p>The substring to search for.</p>
12755
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12756
     *
12757
     * @psalm-pure
12758
     *
12759
     * @return string
12760
     *                <p>Return the sub-string.</p>
12761
     */
12762 2
    public static function substr_right(
12763
        string $haystack,
12764
        string $needle,
12765
        string $encoding = 'UTF-8'
12766
    ): string {
12767 2
        if ($haystack === '') {
12768 2
            return '';
12769
        }
12770
12771 2
        if ($needle === '') {
12772 2
            return $haystack;
12773
        }
12774
12775
        if (
12776 2
            $encoding === 'UTF-8'
12777
            &&
12778 2
            \substr($haystack, -\strlen($needle)) === $needle
12779
        ) {
12780 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12781
        }
12782
12783 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12784
            return (string) self::substr(
12785
                $haystack,
12786
                0,
12787
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12788
                $encoding
12789
            );
12790
        }
12791
12792 2
        return $haystack;
12793
    }
12794
12795
    /**
12796
     * Returns a case swapped version of the string.
12797
     *
12798
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12799
     *
12800
     * @param string $str        <p>The input string.</p>
12801
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12802
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12803
     *
12804
     * @psalm-pure
12805
     *
12806
     * @return string
12807
     *                <p>Each character's case swapped.</p>
12808
     */
12809 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12810
    {
12811 6
        if ($str === '') {
12812 1
            return '';
12813
        }
12814
12815 6
        if ($clean_utf8) {
12816
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12817
            // if invalid characters are found in $haystack before $needle
12818 2
            $str = self::clean($str);
12819
        }
12820
12821 6
        if ($encoding === 'UTF-8') {
12822 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12823
        }
12824
12825 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12826
    }
12827
12828
    /**
12829
     * Checks whether symfony-polyfills are used.
12830
     *
12831
     * @psalm-pure
12832
     *
12833
     * @return bool
12834
     *              <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p>
12835
     *
12836
     * @internal <p>Please do not use it anymore, we will make is private in next major version.</p>
12837
     */
12838
    public static function symfony_polyfill_used(): bool
12839
    {
12840
        // init
12841
        $return = false;
12842
12843
        $return_tmp = \extension_loaded('mbstring');
12844
        if (!$return_tmp && \function_exists('mb_strlen')) {
12845
            $return = true;
12846
        }
12847
12848
        $return_tmp = \extension_loaded('iconv');
12849
        if (!$return_tmp && \function_exists('iconv')) {
12850
            $return = true;
12851
        }
12852
12853
        return $return;
12854
    }
12855
12856
    /**
12857
     * @param string $str
12858
     * @param int    $tab_length
12859
     *
12860
     * @psalm-pure
12861
     *
12862
     * @return string
12863
     */
12864 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12865
    {
12866 6
        if ($tab_length === 4) {
12867 3
            $spaces = '    ';
12868 3
        } elseif ($tab_length === 2) {
12869 1
            $spaces = '  ';
12870
        } else {
12871 2
            $spaces = \str_repeat(' ', $tab_length);
12872
        }
12873
12874 6
        return \str_replace("\t", $spaces, $str);
12875
    }
12876
12877
    /**
12878
     * Converts the first character of each word in the string to uppercase
12879
     * and all other chars to lowercase.
12880
     *
12881
     * @param string      $str                           <p>The input string.</p>
12882
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12883
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12884
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12885
     *                                                   tr</p>
12886
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12887
     *                                                   -> ß</p>
12888
     *
12889
     * @psalm-pure
12890
     *
12891
     * @return string
12892
     *                <p>A string with all characters of $str being title-cased.</p>
12893
     */
12894 5
    public static function titlecase(
12895
        string $str,
12896
        string $encoding = 'UTF-8',
12897
        bool $clean_utf8 = false,
12898
        string $lang = null,
12899
        bool $try_to_keep_the_string_length = false
12900
    ): string {
12901 5
        if ($clean_utf8) {
12902
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12903
            // if invalid characters are found in $haystack before $needle
12904
            $str = self::clean($str);
12905
        }
12906
12907
        if (
12908 5
            $lang === null
12909
            &&
12910 5
            !$try_to_keep_the_string_length
12911
        ) {
12912 5
            if ($encoding === 'UTF-8') {
12913 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12914
            }
12915
12916 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12917
12918 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12919
        }
12920
12921
        return self::str_titleize(
12922
            $str,
12923
            null,
12924
            $encoding,
12925
            false,
12926
            $lang,
12927
            $try_to_keep_the_string_length,
12928
            false
12929
        );
12930
    }
12931
12932
    /**
12933
     * alias for "UTF8::to_ascii()"
12934
     *
12935
     * @param string $str
12936
     * @param string $subst_chr
12937
     * @param bool   $strict
12938
     *
12939
     * @psalm-pure
12940
     *
12941
     * @return string
12942
     *
12943
     * @see        UTF8::to_ascii()
12944
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12945
     */
12946 7
    public static function toAscii(
12947
        string $str,
12948
        string $subst_chr = '?',
12949
        bool $strict = false
12950
    ): string {
12951 7
        return self::to_ascii($str, $subst_chr, $strict);
12952
    }
12953
12954
    /**
12955
     * alias for "UTF8::to_iso8859()"
12956
     *
12957
     * @param string|string[] $str
12958
     *
12959
     * @psalm-pure
12960
     *
12961
     * @return string|string[]
12962
     *
12963
     * @see        UTF8::to_iso8859()
12964
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12965
     */
12966 2
    public static function toIso8859($str)
12967
    {
12968 2
        return self::to_iso8859($str);
12969
    }
12970
12971
    /**
12972
     * alias for "UTF8::to_latin1()"
12973
     *
12974
     * @param string|string[] $str
12975
     *
12976
     * @psalm-pure
12977
     *
12978
     * @return string|string[]
12979
     *
12980
     * @see        UTF8::to_iso8859()
12981
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12982
     */
12983 2
    public static function toLatin1($str)
12984
    {
12985 2
        return self::to_iso8859($str);
12986
    }
12987
12988
    /**
12989
     * alias for "UTF8::to_utf8()"
12990
     *
12991
     * @param string|string[] $str
12992
     *
12993
     * @psalm-pure
12994
     *
12995
     * @return string|string[]
12996
     *
12997
     * @see        UTF8::to_utf8()
12998
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12999
     */
13000 2
    public static function toUTF8($str)
13001
    {
13002 2
        return self::to_utf8($str);
13003
    }
13004
13005
    /**
13006
     * Convert a string into ASCII.
13007
     *
13008
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
13009
     *
13010
     * @param string $str     <p>The input string.</p>
13011
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
13012
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
13013
     *                        performance</p>
13014
     *
13015
     * @psalm-pure
13016
     *
13017
     * @return string
13018
     */
13019 37
    public static function to_ascii(
13020
        string $str,
13021
        string $unknown = '?',
13022
        bool $strict = false
13023
    ): string {
13024 37
        return ASCII::to_transliterate($str, $unknown, $strict);
13025
    }
13026
13027
    /**
13028
     * @param bool|float|int|string $str
13029
     *
13030
     * @psalm-pure
13031
     *
13032
     * @return bool
13033
     */
13034 25
    public static function to_boolean($str): bool
13035
    {
13036
        // init
13037 25
        $str = (string) $str;
13038
13039 25
        if ($str === '') {
13040 2
            return false;
13041
        }
13042
13043
        // Info: http://php.net/manual/en/filter.filters.validate.php
13044 23
        $map = [
13045
            'true'  => true,
13046
            '1'     => true,
13047
            'on'    => true,
13048
            'yes'   => true,
13049
            'false' => false,
13050
            '0'     => false,
13051
            'off'   => false,
13052
            'no'    => false,
13053
        ];
13054
13055 23
        if (isset($map[$str])) {
13056 13
            return $map[$str];
13057
        }
13058
13059 10
        $key = \strtolower($str);
13060 10
        if (isset($map[$key])) {
13061 2
            return $map[$key];
13062
        }
13063
13064 8
        if (\is_numeric($str)) {
13065 6
            return ((float) $str) > 0;
13066
        }
13067
13068 2
        return (bool) \trim($str);
13069
    }
13070
13071
    /**
13072
     * Convert given string to safe filename (and keep string case).
13073
     *
13074
     * @param string $str
13075
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
13076
     *                                  simply replaced with hyphen.
13077
     * @param string $fallback_char
13078
     *
13079
     * @psalm-pure
13080
     *
13081
     * @return string
13082
     */
13083 1
    public static function to_filename(
13084
        string $str,
13085
        bool $use_transliterate = false,
13086
        string $fallback_char = '-'
13087
    ): string {
13088 1
        return ASCII::to_filename(
13089 1
            $str,
13090
            $use_transliterate,
13091
            $fallback_char
13092
        );
13093
    }
13094
13095
    /**
13096
     * Convert a string into "ISO-8859"-encoding (Latin-1).
13097
     *
13098
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
13099
     *
13100
     * @param string|string[] $str
13101
     *
13102
     * @psalm-pure
13103
     *
13104
     * @return string|string[]
13105
     *
13106
     * @template TToIso8859
13107
     * @phpstan-param TToIso8859 $str
13108
     * @phpstan-return TToIso8859
13109
     */
13110 8
    public static function to_iso8859($str)
13111
    {
13112 8
        if (\is_array($str)) {
13113 2
            foreach ($str as &$v) {
13114 2
                $v = self::to_iso8859($v);
13115
            }
13116
13117 2
            return $str;
13118
        }
13119
13120 8
        $str = (string) $str;
13121 8
        if ($str === '') {
13122 2
            return '';
13123
        }
13124
13125 8
        return self::utf8_decode($str);
13126
    }
13127
13128
    /**
13129
     * alias for "UTF8::to_iso8859()"
13130
     *
13131
     * @param string|string[] $str
13132
     *
13133
     * @psalm-pure
13134
     *
13135
     * @return string|string[]
13136
     *
13137
     * @see        UTF8::to_iso8859()
13138
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
13139
     */
13140 2
    public static function to_latin1($str)
13141
    {
13142 2
        return self::to_iso8859($str);
13143
    }
13144
13145
    /**
13146
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13147
     *
13148
     * <ul>
13149
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13150
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13151
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13152
     * case.</li>
13153
     * </ul>
13154
     *
13155
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
13156
     *
13157
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
13158
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13159
     *
13160
     * @psalm-pure
13161
     *
13162
     * @return string|string[]
13163
     *                         <p>The UTF-8 encoded string</p>
13164
     *
13165
     * @template TToUtf8
13166
     * @phpstan-param TToUtf8 $str
13167
     * @phpstan-return TToUtf8
13168
     *
13169
     * @noinspection SuspiciousBinaryOperationInspection
13170
     */
13171 44
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
13172
    {
13173 44
        if (\is_array($str)) {
13174 4
            foreach ($str as &$v) {
13175 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
13176
            }
13177
13178
            /** @phpstan-var TToUtf8 $str */
13179 4
            return $str;
13180
        }
13181
13182
        /** @phpstan-var TToUtf8 $str */
13183 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
13184
13185 44
        return $str;
13186
    }
13187
13188
    /**
13189
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
13190
     *
13191
     * <ul>
13192
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
13193
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
13194
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
13195
     * case.</li>
13196
     * </ul>
13197
     *
13198
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
13199
     *
13200
     * @param string $str                        <p>Any string.</p>
13201
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
13202
     *
13203
     * @psalm-pure
13204
     *
13205
     * @return string
13206
     *                <p>The UTF-8 encoded string</p>
13207
     *
13208
     * @noinspection SuspiciousBinaryOperationInspection
13209
     */
13210 44
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13211
    {
13212 44
        if ($str === '') {
13213 7
            return $str;
13214
        }
13215
13216 44
        $max = \strlen($str);
13217 44
        $buf = '';
13218
13219 44
        for ($i = 0; $i < $max; ++$i) {
13220 44
            $c1 = $str[$i];
13221
13222 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13223
13224 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13225
13226 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13227
13228 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13229 22
                        $buf .= $c1 . $c2;
13230 22
                        ++$i;
13231
                    } else { // not valid UTF8 - convert it
13232 36
                        $buf .= self::to_utf8_convert_helper($c1);
13233
                    }
13234 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13235
13236 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13237 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13238
13239 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13240 17
                        $buf .= $c1 . $c2 . $c3;
13241 17
                        $i += 2;
13242
                    } else { // not valid UTF8 - convert it
13243 36
                        $buf .= self::to_utf8_convert_helper($c1);
13244
                    }
13245 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13246
13247 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13248 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13249 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13250
13251 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13252 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13253 10
                        $i += 3;
13254
                    } else { // not valid UTF8 - convert it
13255 28
                        $buf .= self::to_utf8_convert_helper($c1);
13256
                    }
13257
                } else { // doesn't look like UTF8, but should be converted
13258
13259 40
                    $buf .= self::to_utf8_convert_helper($c1);
13260
                }
13261 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13262
13263 4
                $buf .= self::to_utf8_convert_helper($c1);
13264
            } else { // it doesn't need conversion
13265
13266 41
                $buf .= $c1;
13267
            }
13268
        }
13269
13270
        // decode unicode escape sequences + unicode surrogate pairs
13271 44
        $buf = \preg_replace_callback(
13272 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13273
            /**
13274
             * @param array $matches
13275
             *
13276
             * @psalm-pure
13277
             *
13278
             * @return string
13279
             */
13280 44
            static function (array $matches): string {
13281 13
                if (isset($matches[3])) {
13282 13
                    $cp = (int) \hexdec($matches[3]);
13283
                } else {
13284
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13285 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13286 1
                          + (int) \hexdec($matches[2])
13287 1
                          + 0x10000
13288 1
                          - (0xD800 << 10)
13289 1
                          - 0xDC00;
13290
                }
13291
13292
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13293
                //
13294
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13295
13296 13
                if ($cp < 0x80) {
13297 8
                    return (string) self::chr($cp);
13298
                }
13299
13300 10
                if ($cp < 0xA0) {
13301
                    /** @noinspection UnnecessaryCastingInspection */
13302
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13303
                }
13304
13305 10
                return self::decimal_to_chr($cp);
13306 44
            },
13307 44
            $buf
13308
        );
13309
13310 44
        if ($buf === null) {
13311
            return '';
13312
        }
13313
13314
        // decode UTF-8 codepoints
13315 44
        if ($decode_html_entity_to_utf8) {
13316 3
            $buf = self::html_entity_decode($buf);
13317
        }
13318
13319 44
        return $buf;
13320
    }
13321
13322
    /**
13323
     * Returns the given string as an integer, or null if the string isn't numeric.
13324
     *
13325
     * @param string $str
13326
     *
13327
     * @psalm-pure
13328
     *
13329
     * @return int|null
13330
     *                  <p>null if the string isn't numeric</p>
13331
     */
13332 1
    public static function to_int(string $str)
13333
    {
13334 1
        if (\is_numeric($str)) {
13335 1
            return (int) $str;
13336
        }
13337
13338 1
        return null;
13339
    }
13340
13341
    /**
13342
     * Returns the given input as string, or null if the input isn't int|float|string
13343
     * and do not implement the "__toString()" method.
13344
     *
13345
     * @param float|int|object|string|null $input
13346
     *
13347
     * @psalm-pure
13348
     *
13349
     * @return string|null
13350
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13351
     */
13352 1
    public static function to_string($input)
13353
    {
13354 1
        if ($input === null) {
13355
            return null;
13356
        }
13357
13358
        /** @var string $input_type - hack for psalm */
13359 1
        $input_type = \gettype($input);
13360
13361
        if (
13362 1
            $input_type === 'string'
13363
            ||
13364 1
            $input_type === 'integer'
13365
            ||
13366 1
            $input_type === 'float'
13367
            ||
13368 1
            $input_type === 'double'
13369
        ) {
13370 1
            return (string) $input;
13371
        }
13372
13373 1
        if ($input_type === 'object') {
13374
            /** @noinspection PhpSillyAssignmentInspection */
13375
            /** @var object $input - hack for psalm / phpstan */
13376 1
            $input = $input;
13377
            /** @noinspection NestedPositiveIfStatementsInspection */
13378
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13379 1
            if (\method_exists($input, '__toString')) {
13380 1
                return (string) $input;
13381
            }
13382
        }
13383
13384 1
        return null;
13385
    }
13386
13387
    /**
13388
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13389
     *
13390
     * INFO: This is slower then "trim()"
13391
     *
13392
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13393
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13394
     *
13395
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13396
     *
13397
     * @param string      $str   <p>The string to be trimmed</p>
13398
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13399
     *
13400
     * @psalm-pure
13401
     *
13402
     * @return string
13403
     *                <p>The trimmed string.</p>
13404
     */
13405 57
    public static function trim(string $str = '', string $chars = null): string
13406
    {
13407 57
        if ($str === '') {
13408 9
            return '';
13409
        }
13410
13411 50
        if (self::$SUPPORT['mbstring'] === true) {
13412 50
            if ($chars !== null) {
13413
                /** @noinspection PregQuoteUsageInspection */
13414 28
                $chars = \preg_quote($chars);
13415 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13416
            } else {
13417 22
                $pattern = '^[\\s]+|[\\s]+$';
13418
            }
13419
13420
            /** @noinspection PhpComposerExtensionStubsInspection */
13421 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13422
        }
13423
13424 8
        if ($chars !== null) {
13425
            $chars = \preg_quote($chars, '/');
13426
            $pattern = "^[${chars}]+|[${chars}]+\$";
13427
        } else {
13428 8
            $pattern = '^[\\s]+|[\\s]+$';
13429
        }
13430
13431 8
        return self::regex_replace($str, $pattern, '');
13432
    }
13433
13434
    /**
13435
     * Makes string's first char uppercase.
13436
     *
13437
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13438
     *
13439
     * @param string      $str                           <p>The input string.</p>
13440
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13441
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13442
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13443
     *                                                   tr</p>
13444
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13445
     *                                                   -> ß</p>
13446
     *
13447
     * @psalm-pure
13448
     *
13449
     * @return string
13450
     *                <p>The resulting string with with char uppercase.</p>
13451
     */
13452 69
    public static function ucfirst(
13453
        string $str,
13454
        string $encoding = 'UTF-8',
13455
        bool $clean_utf8 = false,
13456
        string $lang = null,
13457
        bool $try_to_keep_the_string_length = false
13458
    ): string {
13459 69
        if ($str === '') {
13460 3
            return '';
13461
        }
13462
13463 68
        if ($clean_utf8) {
13464
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13465
            // if invalid characters are found in $haystack before $needle
13466 1
            $str = self::clean($str);
13467
        }
13468
13469 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13470
13471 68
        if ($encoding === 'UTF-8') {
13472 22
            $str_part_two = (string) \mb_substr($str, 1);
13473
13474 22
            if ($use_mb_functions) {
13475 22
                $str_part_one = \mb_strtoupper(
13476 22
                    (string) \mb_substr($str, 0, 1)
13477
                );
13478
            } else {
13479 22
                $str_part_one = self::strtoupper(
13480
                    (string) \mb_substr($str, 0, 1),
13481
                    $encoding,
13482
                    false,
13483
                    $lang,
13484
                    $try_to_keep_the_string_length
13485
                );
13486
            }
13487
        } else {
13488 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13489
13490 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13491
13492 47
            if ($use_mb_functions) {
13493 47
                $str_part_one = \mb_strtoupper(
13494 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13495 47
                    $encoding
13496
                );
13497
            } else {
13498
                $str_part_one = self::strtoupper(
13499
                    (string) self::substr($str, 0, 1, $encoding),
13500
                    $encoding,
13501
                    false,
13502
                    $lang,
13503
                    $try_to_keep_the_string_length
13504
                );
13505
            }
13506
        }
13507
13508 68
        return $str_part_one . $str_part_two;
13509
    }
13510
13511
    /**
13512
     * alias for "UTF8::ucfirst()"
13513
     *
13514
     * @param string $str
13515
     * @param string $encoding
13516
     * @param bool   $clean_utf8
13517
     *
13518
     * @psalm-pure
13519
     *
13520
     * @return string
13521
     *
13522
     * @see        UTF8::ucfirst()
13523
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13524
     */
13525 1
    public static function ucword(
13526
        string $str,
13527
        string $encoding = 'UTF-8',
13528
        bool $clean_utf8 = false
13529
    ): string {
13530 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13531
    }
13532
13533
    /**
13534
     * Uppercase for all words in the string.
13535
     *
13536
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13537
     *
13538
     * @param string   $str        <p>The input string.</p>
13539
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13540
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13541
     *                             word.</p>
13542
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13543
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13544
     *
13545
     * @psalm-pure
13546
     *
13547
     * @return string
13548
     */
13549 8
    public static function ucwords(
13550
        string $str,
13551
        array $exceptions = [],
13552
        string $char_list = '',
13553
        string $encoding = 'UTF-8',
13554
        bool $clean_utf8 = false
13555
    ): string {
13556 8
        if (!$str) {
13557 2
            return '';
13558
        }
13559
13560
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13561
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13562
13563 7
        if ($clean_utf8) {
13564
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13565
            // if invalid characters are found in $haystack before $needle
13566 1
            $str = self::clean($str);
13567
        }
13568
13569 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13570
13571
        if (
13572 7
            $use_php_default_functions
13573
            &&
13574 7
            ASCII::is_ascii($str)
13575
        ) {
13576
            return \ucwords($str);
13577
        }
13578
13579 7
        $words = self::str_to_words($str, $char_list);
13580 7
        $use_exceptions = $exceptions !== [];
13581
13582 7
        $words_str = '';
13583 7
        foreach ($words as &$word) {
13584 7
            if (!$word) {
13585 7
                continue;
13586
            }
13587
13588
            if (
13589 7
                !$use_exceptions
13590
                ||
13591 7
                !\in_array($word, $exceptions, true)
13592
            ) {
13593 7
                $words_str .= self::ucfirst($word, $encoding);
13594
            } else {
13595 1
                $words_str .= $word;
13596
            }
13597
        }
13598
13599 7
        return $words_str;
13600
    }
13601
13602
    /**
13603
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13604
     *
13605
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13606
     *
13607
     * e.g:
13608
     * 'test+test'                     => 'test test'
13609
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13610
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13611
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13612
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13613
     * 'Düsseldorf'                   => 'Düsseldorf'
13614
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13615
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13616
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13617
     *
13618
     * @param string $str          <p>The input string.</p>
13619
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13620
     *
13621
     * @psalm-pure
13622
     *
13623
     * @return string
13624
     */
13625 4
    public static function urldecode(string $str, bool $multi_decode = true): string
13626
    {
13627 4
        if ($str === '') {
13628 3
            return '';
13629
        }
13630
13631 4
        $str = self::urldecode_unicode_helper($str);
13632
13633 4
        if ($multi_decode) {
13634
            do {
13635 3
                $str_compare = $str;
13636
13637
                /**
13638
                 * @psalm-suppress PossiblyInvalidArgument
13639
                 */
13640 3
                $str = \urldecode(
13641 3
                    self::html_entity_decode(
13642 3
                        self::to_utf8($str),
13643 3
                        \ENT_QUOTES | \ENT_HTML5
13644
                    )
13645
                );
13646 3
            } while ($str_compare !== $str);
13647
        } else {
13648
            /**
13649
             * @psalm-suppress PossiblyInvalidArgument
13650
             */
13651 1
            $str = \urldecode(
13652 1
                self::html_entity_decode(
13653 1
                    self::to_utf8($str),
13654 1
                    \ENT_QUOTES | \ENT_HTML5
13655
                )
13656
            );
13657
        }
13658
13659 4
        return self::fix_simple_utf8($str);
13660
    }
13661
13662
    /**
13663
     * Return a array with "urlencoded"-win1252 -> UTF-8
13664
     *
13665
     * @psalm-pure
13666
     *
13667
     * @return string[]
13668
     *
13669
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13670
     */
13671 2
    public static function urldecode_fix_win1252_chars(): array
13672
    {
13673
        return [
13674 2
            '%20' => ' ',
13675
            '%21' => '!',
13676
            '%22' => '"',
13677
            '%23' => '#',
13678
            '%24' => '$',
13679
            '%25' => '%',
13680
            '%26' => '&',
13681
            '%27' => "'",
13682
            '%28' => '(',
13683
            '%29' => ')',
13684
            '%2A' => '*',
13685
            '%2B' => '+',
13686
            '%2C' => ',',
13687
            '%2D' => '-',
13688
            '%2E' => '.',
13689
            '%2F' => '/',
13690
            '%30' => '0',
13691
            '%31' => '1',
13692
            '%32' => '2',
13693
            '%33' => '3',
13694
            '%34' => '4',
13695
            '%35' => '5',
13696
            '%36' => '6',
13697
            '%37' => '7',
13698
            '%38' => '8',
13699
            '%39' => '9',
13700
            '%3A' => ':',
13701
            '%3B' => ';',
13702
            '%3C' => '<',
13703
            '%3D' => '=',
13704
            '%3E' => '>',
13705
            '%3F' => '?',
13706
            '%40' => '@',
13707
            '%41' => 'A',
13708
            '%42' => 'B',
13709
            '%43' => 'C',
13710
            '%44' => 'D',
13711
            '%45' => 'E',
13712
            '%46' => 'F',
13713
            '%47' => 'G',
13714
            '%48' => 'H',
13715
            '%49' => 'I',
13716
            '%4A' => 'J',
13717
            '%4B' => 'K',
13718
            '%4C' => 'L',
13719
            '%4D' => 'M',
13720
            '%4E' => 'N',
13721
            '%4F' => 'O',
13722
            '%50' => 'P',
13723
            '%51' => 'Q',
13724
            '%52' => 'R',
13725
            '%53' => 'S',
13726
            '%54' => 'T',
13727
            '%55' => 'U',
13728
            '%56' => 'V',
13729
            '%57' => 'W',
13730
            '%58' => 'X',
13731
            '%59' => 'Y',
13732
            '%5A' => 'Z',
13733
            '%5B' => '[',
13734
            '%5C' => '\\',
13735
            '%5D' => ']',
13736
            '%5E' => '^',
13737
            '%5F' => '_',
13738
            '%60' => '`',
13739
            '%61' => 'a',
13740
            '%62' => 'b',
13741
            '%63' => 'c',
13742
            '%64' => 'd',
13743
            '%65' => 'e',
13744
            '%66' => 'f',
13745
            '%67' => 'g',
13746
            '%68' => 'h',
13747
            '%69' => 'i',
13748
            '%6A' => 'j',
13749
            '%6B' => 'k',
13750
            '%6C' => 'l',
13751
            '%6D' => 'm',
13752
            '%6E' => 'n',
13753
            '%6F' => 'o',
13754
            '%70' => 'p',
13755
            '%71' => 'q',
13756
            '%72' => 'r',
13757
            '%73' => 's',
13758
            '%74' => 't',
13759
            '%75' => 'u',
13760
            '%76' => 'v',
13761
            '%77' => 'w',
13762
            '%78' => 'x',
13763
            '%79' => 'y',
13764
            '%7A' => 'z',
13765
            '%7B' => '{',
13766
            '%7C' => '|',
13767
            '%7D' => '}',
13768
            '%7E' => '~',
13769
            '%7F' => '',
13770
            '%80' => '`',
13771
            '%81' => '',
13772
            '%82' => '‚',
13773
            '%83' => 'ƒ',
13774
            '%84' => '„',
13775
            '%85' => '…',
13776
            '%86' => '†',
13777
            '%87' => '‡',
13778
            '%88' => 'ˆ',
13779
            '%89' => '‰',
13780
            '%8A' => 'Š',
13781
            '%8B' => '‹',
13782
            '%8C' => 'Œ',
13783
            '%8D' => '',
13784
            '%8E' => 'Ž',
13785
            '%8F' => '',
13786
            '%90' => '',
13787
            '%91' => '‘',
13788
            '%92' => '’',
13789
            '%93' => '“',
13790
            '%94' => '”',
13791
            '%95' => '•',
13792
            '%96' => '–',
13793
            '%97' => '—',
13794
            '%98' => '˜',
13795
            '%99' => '™',
13796
            '%9A' => 'š',
13797
            '%9B' => '›',
13798
            '%9C' => 'œ',
13799
            '%9D' => '',
13800
            '%9E' => 'ž',
13801
            '%9F' => 'Ÿ',
13802
            '%A0' => '',
13803
            '%A1' => '¡',
13804
            '%A2' => '¢',
13805
            '%A3' => '£',
13806
            '%A4' => '¤',
13807
            '%A5' => '¥',
13808
            '%A6' => '¦',
13809
            '%A7' => '§',
13810
            '%A8' => '¨',
13811
            '%A9' => '©',
13812
            '%AA' => 'ª',
13813
            '%AB' => '«',
13814
            '%AC' => '¬',
13815
            '%AD' => '',
13816
            '%AE' => '®',
13817
            '%AF' => '¯',
13818
            '%B0' => '°',
13819
            '%B1' => '±',
13820
            '%B2' => '²',
13821
            '%B3' => '³',
13822
            '%B4' => '´',
13823
            '%B5' => 'µ',
13824
            '%B6' => '¶',
13825
            '%B7' => '·',
13826
            '%B8' => '¸',
13827
            '%B9' => '¹',
13828
            '%BA' => 'º',
13829
            '%BB' => '»',
13830
            '%BC' => '¼',
13831
            '%BD' => '½',
13832
            '%BE' => '¾',
13833
            '%BF' => '¿',
13834
            '%C0' => 'À',
13835
            '%C1' => 'Á',
13836
            '%C2' => 'Â',
13837
            '%C3' => 'Ã',
13838
            '%C4' => 'Ä',
13839
            '%C5' => 'Å',
13840
            '%C6' => 'Æ',
13841
            '%C7' => 'Ç',
13842
            '%C8' => 'È',
13843
            '%C9' => 'É',
13844
            '%CA' => 'Ê',
13845
            '%CB' => 'Ë',
13846
            '%CC' => 'Ì',
13847
            '%CD' => 'Í',
13848
            '%CE' => 'Î',
13849
            '%CF' => 'Ï',
13850
            '%D0' => 'Ð',
13851
            '%D1' => 'Ñ',
13852
            '%D2' => 'Ò',
13853
            '%D3' => 'Ó',
13854
            '%D4' => 'Ô',
13855
            '%D5' => 'Õ',
13856
            '%D6' => 'Ö',
13857
            '%D7' => '×',
13858
            '%D8' => 'Ø',
13859
            '%D9' => 'Ù',
13860
            '%DA' => 'Ú',
13861
            '%DB' => 'Û',
13862
            '%DC' => 'Ü',
13863
            '%DD' => 'Ý',
13864
            '%DE' => 'Þ',
13865
            '%DF' => 'ß',
13866
            '%E0' => 'à',
13867
            '%E1' => 'á',
13868
            '%E2' => 'â',
13869
            '%E3' => 'ã',
13870
            '%E4' => 'ä',
13871
            '%E5' => 'å',
13872
            '%E6' => 'æ',
13873
            '%E7' => 'ç',
13874
            '%E8' => 'è',
13875
            '%E9' => 'é',
13876
            '%EA' => 'ê',
13877
            '%EB' => 'ë',
13878
            '%EC' => 'ì',
13879
            '%ED' => 'í',
13880
            '%EE' => 'î',
13881
            '%EF' => 'ï',
13882
            '%F0' => 'ð',
13883
            '%F1' => 'ñ',
13884
            '%F2' => 'ò',
13885
            '%F3' => 'ó',
13886
            '%F4' => 'ô',
13887
            '%F5' => 'õ',
13888
            '%F6' => 'ö',
13889
            '%F7' => '÷',
13890
            '%F8' => 'ø',
13891
            '%F9' => 'ù',
13892
            '%FA' => 'ú',
13893
            '%FB' => 'û',
13894
            '%FC' => 'ü',
13895
            '%FD' => 'ý',
13896
            '%FE' => 'þ',
13897
            '%FF' => 'ÿ',
13898
        ];
13899
    }
13900
13901
    /**
13902
     * Decodes a UTF-8 string to ISO-8859-1.
13903
     *
13904
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13905
     *
13906
     * @param string $str             <p>The input string.</p>
13907
     * @param bool   $keep_utf8_chars
13908
     *
13909
     * @psalm-pure
13910
     *
13911
     * @return string
13912
     *
13913
     * @noinspection SuspiciousBinaryOperationInspection
13914
     */
13915 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13916
    {
13917 14
        if ($str === '') {
13918 6
            return '';
13919
        }
13920
13921
        // save for later comparision
13922 14
        $str_backup = $str;
13923 14
        $len = \strlen($str);
13924
13925 14
        if (self::$ORD === null) {
13926
            self::$ORD = self::getData('ord');
13927
        }
13928
13929 14
        if (self::$CHR === null) {
13930
            self::$CHR = self::getData('chr');
13931
        }
13932
13933 14
        $no_char_found = '?';
13934
        /** @noinspection ForeachInvariantsInspection */
13935 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13936 14
            switch ($str[$i] & "\xF0") {
13937 14
                case "\xC0":
13938 13
                case "\xD0":
13939 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13940 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13941
13942 13
                    break;
13943
13944
                /** @noinspection PhpMissingBreakStatementInspection */
13945 13
                case "\xF0":
13946
                    ++$i;
13947
13948
                // no break
13949
13950 13
                case "\xE0":
13951 11
                    $str[$j] = $no_char_found;
13952 11
                    $i += 2;
13953
13954 11
                    break;
13955
13956
                default:
13957 12
                    $str[$j] = $str[$i];
13958
            }
13959
        }
13960
13961
        /** @var false|string $return - needed for PhpStan (stubs error) */
13962 14
        $return = \substr($str, 0, $j);
13963 14
        if ($return === false) {
13964
            $return = '';
13965
        }
13966
13967
        if (
13968 14
            $keep_utf8_chars
13969
            &&
13970 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13971
        ) {
13972 2
            return $str_backup;
13973
        }
13974
13975 14
        return $return;
13976
    }
13977
13978
    /**
13979
     * Encodes an ISO-8859-1 string to UTF-8.
13980
     *
13981
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13982
     *
13983
     * @param string $str <p>The input string.</p>
13984
     *
13985
     * @psalm-pure
13986
     *
13987
     * @return string
13988
     */
13989 16
    public static function utf8_encode(string $str): string
13990
    {
13991 16
        if ($str === '') {
13992 14
            return '';
13993
        }
13994
13995
        /** @var false|string $str - the polyfill maybe return false */
13996 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $string of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13996
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13997
13998
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13999
        /** @psalm-suppress TypeDoesNotContainType */
14000 16
        if ($str === false) {
14001
            return '';
14002
        }
14003
14004 16
        return $str;
14005
    }
14006
14007
    /**
14008
     * fix -> utf8-win1252 chars
14009
     *
14010
     * @param string $str <p>The input string.</p>
14011
     *
14012
     * @psalm-pure
14013
     *
14014
     * @return string
14015
     *
14016
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
14017
     */
14018 2
    public static function utf8_fix_win1252_chars(string $str): string
14019
    {
14020 2
        return self::fix_simple_utf8($str);
14021
    }
14022
14023
    /**
14024
     * Returns an array with all utf8 whitespace characters.
14025
     *
14026
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
14027
     *
14028
     * @psalm-pure
14029
     *
14030
     * @return string[]
14031
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
14032
     *                  as defined in above URL
14033
     */
14034 2
    public static function whitespace_table(): array
14035
    {
14036 2
        return self::$WHITESPACE_TABLE;
14037
    }
14038
14039
    /**
14040
     * Limit the number of words in a string.
14041
     *
14042
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
14043
     *
14044
     * @param string $str        <p>The input string.</p>
14045
     * @param int    $limit      <p>The limit of words as integer.</p>
14046
     * @param string $str_add_on <p>Replacement for the striped string.</p>
14047
     *
14048
     * @psalm-pure
14049
     *
14050
     * @return string
14051
     */
14052 2
    public static function words_limit(
14053
        string $str,
14054
        int $limit = 100,
14055
        string $str_add_on = '…'
14056
    ): string {
14057 2
        if ($str === '' || $limit < 1) {
14058 2
            return '';
14059
        }
14060
14061 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
14062
14063
        if (
14064 2
            !isset($matches[0])
14065
            ||
14066 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
14067
        ) {
14068 2
            return $str;
14069
        }
14070
14071 2
        return \rtrim($matches[0]) . $str_add_on;
14072
    }
14073
14074
    /**
14075
     * Wraps a string to a given number of characters
14076
     *
14077
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
14078
     *
14079
     * @see http://php.net/manual/en/function.wordwrap.php
14080
     *
14081
     * @param string $str   <p>The input string.</p>
14082
     * @param int    $width [optional] <p>The column width.</p>
14083
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
14084
     * @param bool   $cut   [optional] <p>
14085
     *                      If the cut is set to true, the string is
14086
     *                      always wrapped at or before the specified width. So if you have
14087
     *                      a word that is larger than the given width, it is broken apart.
14088
     *                      </p>
14089
     *
14090
     * @psalm-pure
14091
     *
14092
     * @return string
14093
     *                <p>The given string wrapped at the specified column.</p>
14094
     */
14095 12
    public static function wordwrap(
14096
        string $str,
14097
        int $width = 75,
14098
        string $break = "\n",
14099
        bool $cut = false
14100
    ): string {
14101 12
        if ($str === '' || $break === '') {
14102 4
            return '';
14103
        }
14104
14105 10
        $str_split = \explode($break, $str);
14106 10
        if ($str_split === false) {
14107
            return '';
14108
        }
14109
14110
        /** @var string[] $charsArray */
14111 10
        $charsArray = [];
14112 10
        $word_split = '';
14113 10
        foreach ($str_split as $i => $i_value) {
14114 10
            if ($i) {
14115 3
                $charsArray[] = $break;
14116 3
                $word_split .= '#';
14117
            }
14118
14119 10
            foreach (self::str_split($i_value) as $c) {
14120 10
                $charsArray[] = $c;
14121 10
                if ($c === ' ') {
14122 3
                    $word_split .= ' ';
14123
                } else {
14124 10
                    $word_split .= '?';
14125
                }
14126
            }
14127
        }
14128
14129 10
        $str_return = '';
14130 10
        $j = 0;
14131 10
        $b = -1;
14132 10
        $i = -1;
14133 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
14134
14135 10
        $max = \mb_strlen($word_split);
14136
        /** @noinspection PhpAssignmentInConditionInspection - is ok here */
14137 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
14138 8
            for (++$i; $i < $b; ++$i) {
14139 8
                if (isset($charsArray[$j])) {
14140 8
                    $str_return .= $charsArray[$j];
14141 8
                    unset($charsArray[$j]);
14142
                }
14143 8
                ++$j;
14144
14145
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14146 8
                if ($i > $max) {
14147
                    break 2;
14148
                }
14149
            }
14150
14151
            if (
14152 8
                $break === $charsArray[$j]
14153
                ||
14154 8
                $charsArray[$j] === ' '
14155
            ) {
14156 5
                unset($charsArray[$j++]);
14157
            }
14158
14159 8
            $str_return .= $break;
14160
14161
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
14162 8
            if ($b > $max) {
14163
                break;
14164
            }
14165
        }
14166
14167 10
        return $str_return . \implode('', $charsArray);
14168
    }
14169
14170
    /**
14171
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
14172
     *    ... so that we wrap the per line.
14173
     *
14174
     * @param string      $str             <p>The input string.</p>
14175
     * @param int         $width           [optional] <p>The column width.</p>
14176
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
14177
     * @param bool        $cut             [optional] <p>
14178
     *                                     If the cut is set to true, the string is
14179
     *                                     always wrapped at or before the specified width. So if you have
14180
     *                                     a word that is larger than the given width, it is broken apart.
14181
     *                                     </p>
14182
     * @param bool        $add_final_break [optional] <p>
14183
     *                                     If this flag is true, then the method will add a $break at the end
14184
     *                                     of the result string.
14185
     *                                     </p>
14186
     * @param string|null $delimiter       [optional] <p>
14187
     *                                     You can change the default behavior, where we split the string by newline.
14188
     *                                     </p>
14189
     *
14190
     * @psalm-pure
14191
     *
14192
     * @return string
14193
     */
14194 1
    public static function wordwrap_per_line(
14195
        string $str,
14196
        int $width = 75,
14197
        string $break = "\n",
14198
        bool $cut = false,
14199
        bool $add_final_break = true,
14200
        string $delimiter = null
14201
    ): string {
14202 1
        if ($delimiter === null) {
14203 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14204
        } else {
14205 1
            $strings = \explode($delimiter, $str);
14206
        }
14207
14208 1
        $string_helper_array = [];
14209 1
        if ($strings !== false) {
14210 1
            foreach ($strings as $value) {
14211 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14212
            }
14213
        }
14214
14215 1
        if ($add_final_break) {
14216 1
            $final_break = $break;
14217
        } else {
14218 1
            $final_break = '';
14219
        }
14220
14221 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14222
    }
14223
14224
    /**
14225
     * Returns an array of Unicode White Space characters.
14226
     *
14227
     * @psalm-pure
14228
     *
14229
     * @return string[]
14230
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14231
     */
14232 2
    public static function ws(): array
14233
    {
14234 2
        return self::$WHITESPACE;
14235
    }
14236
14237
    /**
14238
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14239
     *
14240
     * EXAMPLE: <code>
14241
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14242
     * //
14243
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14244
     * </code>
14245
     *
14246
     * @see          http://hsivonen.iki.fi/php-utf8/
14247
     *
14248
     * @param string $str    <p>The string to be checked.</p>
14249
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14250
     *
14251
     * @psalm-pure
14252
     *
14253
     * @return bool
14254
     *
14255
     * @noinspection ReturnTypeCanBeDeclaredInspection
14256
     */
14257 110
    private static function is_utf8_string(string $str, bool $strict = false)
14258
    {
14259 110
        if ($str === '') {
14260 15
            return true;
14261
        }
14262
14263 103
        if ($strict) {
14264 2
            $is_binary = self::is_binary($str, true);
14265
14266 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14267 2
                return false;
14268
            }
14269
14270
            if ($is_binary && self::is_utf32($str, false) !== false) {
14271
                return false;
14272
            }
14273
        }
14274
14275 103
        if (self::$SUPPORT['pcre_utf8']) {
14276
            // If even just the first character can be matched, when the /u
14277
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14278
            // invalid, nothing at all will match, even if the string contains
14279
            // some valid sequences
14280 103
            return \preg_match('/^./us', $str) === 1;
14281
        }
14282
14283 2
        $mState = 0; // cached expected number of octets after the current octet
14284
        // until the beginning of the next UTF8 character sequence
14285 2
        $mUcs4 = 0; // cached Unicode character
14286 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14287
14288 2
        if (self::$ORD === null) {
14289
            self::$ORD = self::getData('ord');
14290
        }
14291
14292 2
        $len = \strlen($str);
14293
        /** @noinspection ForeachInvariantsInspection */
14294 2
        for ($i = 0; $i < $len; ++$i) {
14295 2
            $in = self::$ORD[$str[$i]];
14296
14297 2
            if ($mState === 0) {
14298
                // When mState is zero we expect either a US-ASCII character or a
14299
                // multi-octet sequence.
14300 2
                if ((0x80 & $in) === 0) {
14301
                    // US-ASCII, pass straight through.
14302 2
                    $mBytes = 1;
14303 2
                } elseif ((0xE0 & $in) === 0xC0) {
14304
                    // First octet of 2 octet sequence.
14305 2
                    $mUcs4 = $in;
14306 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14307 2
                    $mState = 1;
14308 2
                    $mBytes = 2;
14309 2
                } elseif ((0xF0 & $in) === 0xE0) {
14310
                    // First octet of 3 octet sequence.
14311 2
                    $mUcs4 = $in;
14312 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14313 2
                    $mState = 2;
14314 2
                    $mBytes = 3;
14315
                } elseif ((0xF8 & $in) === 0xF0) {
14316
                    // First octet of 4 octet sequence.
14317
                    $mUcs4 = $in;
14318
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14319
                    $mState = 3;
14320
                    $mBytes = 4;
14321
                } elseif ((0xFC & $in) === 0xF8) {
14322
                    /* First octet of 5 octet sequence.
14323
                     *
14324
                     * This is illegal because the encoded codepoint must be either
14325
                     * (a) not the shortest form or
14326
                     * (b) outside the Unicode range of 0-0x10FFFF.
14327
                     * Rather than trying to resynchronize, we will carry on until the end
14328
                     * of the sequence and let the later error handling code catch it.
14329
                     */
14330
                    $mUcs4 = $in;
14331
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14332
                    $mState = 4;
14333
                    $mBytes = 5;
14334
                } elseif ((0xFE & $in) === 0xFC) {
14335
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14336
                    $mUcs4 = $in;
14337
                    $mUcs4 = ($mUcs4 & 1) << 30;
14338
                    $mState = 5;
14339
                    $mBytes = 6;
14340
                } else {
14341
                    // Current octet is neither in the US-ASCII range nor a legal first
14342
                    // octet of a multi-octet sequence.
14343 2
                    return false;
14344
                }
14345 2
            } elseif ((0xC0 & $in) === 0x80) {
14346
14347
                // When mState is non-zero, we expect a continuation of the multi-octet
14348
                // sequence
14349
14350
                // Legal continuation.
14351 2
                $shift = ($mState - 1) * 6;
14352 2
                $tmp = $in;
14353 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14354 2
                $mUcs4 |= $tmp;
14355
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14356
                // Unicode code point to be output.
14357 2
                if (--$mState === 0) {
14358
                    // Check for illegal sequences and code points.
14359
                    //
14360
                    // From Unicode 3.1, non-shortest form is illegal
14361
                    if (
14362 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14363
                        ||
14364 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14365
                        ||
14366 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14367
                        ||
14368 2
                        ($mBytes > 4)
14369
                        ||
14370
                        // From Unicode 3.2, surrogate characters are illegal.
14371 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14372
                        ||
14373
                        // Code points outside the Unicode range are illegal.
14374 2
                        ($mUcs4 > 0x10FFFF)
14375
                    ) {
14376
                        return false;
14377
                    }
14378
                    // initialize UTF8 cache
14379 2
                    $mState = 0;
14380 2
                    $mUcs4 = 0;
14381 2
                    $mBytes = 1;
14382
                }
14383
            } else {
14384
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14385
                // Incomplete multi-octet sequence.
14386
                return false;
14387
            }
14388
        }
14389
14390 2
        return $mState === 0;
14391
    }
14392
14393
    /**
14394
     * @param string $str
14395
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14396
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14397
     *
14398
     * @psalm-pure
14399
     *
14400
     * @return string
14401
     *
14402
     * @noinspection ReturnTypeCanBeDeclaredInspection
14403
     */
14404 33
    private static function fixStrCaseHelper(
14405
        string $str,
14406
        bool $use_lowercase = false,
14407
        bool $use_full_case_fold = false
14408
    ) {
14409 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14410 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14411
14412 33
        if ($use_lowercase) {
14413 2
            $str = \str_replace(
14414 2
                $upper,
14415 2
                $lower,
14416 2
                $str
14417
            );
14418
        } else {
14419 31
            $str = \str_replace(
14420 31
                $lower,
14421 31
                $upper,
14422 31
                $str
14423
            );
14424
        }
14425
14426 33
        if ($use_full_case_fold) {
14427
            /**
14428
             * @psalm-suppress ImpureStaticVariable
14429
             *
14430
             * @var array<mixed>|null
14431
             */
14432 31
            static $FULL_CASE_FOLD = null;
14433 31
            if ($FULL_CASE_FOLD === null) {
14434 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14435
            }
14436
14437 31
            if ($use_lowercase) {
14438 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14439
            } else {
14440 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14441
            }
14442
        }
14443
14444 33
        return $str;
14445
    }
14446
14447
    /**
14448
     * get data from "/data/*.php"
14449
     *
14450
     * @param string $file
14451
     *
14452
     * @psalm-pure
14453
     *
14454
     * @return array
14455
     *
14456
     * @noinspection ReturnTypeCanBeDeclaredInspection
14457
     */
14458 6
    private static function getData(string $file)
14459
    {
14460
        /** @noinspection PhpIncludeInspection */
14461
        /** @noinspection UsingInclusionReturnValueInspection */
14462
        /** @psalm-suppress UnresolvableInclude */
14463 6
        return include __DIR__ . '/data/' . $file . '.php';
14464
    }
14465
14466
    /**
14467
     * @psalm-pure
14468
     *
14469
     * @return true|null
14470
     */
14471 1
    private static function initEmojiData()
14472
    {
14473 1
        if (self::$EMOJI_KEYS_CACHE === null) {
14474 1
            if (self::$EMOJI === null) {
14475 1
                self::$EMOJI = self::getData('emoji');
14476
            }
14477
14478
            /**
14479
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14480
             */
14481 1
            \uksort(
14482 1
                self::$EMOJI,
14483 1
                static function (string $a, string $b): int {
14484 1
                    return \strlen($b) <=> \strlen($a);
14485 1
                }
14486
            );
14487
14488 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14489 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14490
14491 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14492 1
                $tmp_key = \crc32($key);
14493 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14494
            }
14495
14496 1
            return true;
14497
        }
14498
14499
        return null;
14500
    }
14501
14502
    /**
14503
     * Checks whether mbstring "overloaded" is active on the server.
14504
     *
14505
     * @psalm-pure
14506
     *
14507
     * @return bool
14508
     *
14509
     * @noinspection ReturnTypeCanBeDeclaredInspection
14510
     */
14511
    private static function mbstring_overloaded()
14512
    {
14513
        /**
14514
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14515
         */
14516
14517
        /** @noinspection PhpComposerExtensionStubsInspection */
14518
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14519
        return \defined('MB_OVERLOAD_STRING')
14520
               &&
14521
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14522
    }
14523
14524
    /**
14525
     * @param array    $strings
14526
     * @param bool     $remove_empty_values
14527
     * @param int|null $remove_short_values
14528
     *
14529
     * @psalm-pure
14530
     *
14531
     * @return array
14532
     *
14533
     * @noinspection ReturnTypeCanBeDeclaredInspection
14534
     */
14535 2
    private static function reduce_string_array(
14536
        array $strings,
14537
        bool $remove_empty_values,
14538
        int $remove_short_values = null
14539
    ) {
14540
        // init
14541 2
        $return = [];
14542
14543 2
        foreach ($strings as &$str) {
14544
            if (
14545 2
                $remove_short_values !== null
14546
                &&
14547 2
                \mb_strlen($str) <= $remove_short_values
14548
            ) {
14549 2
                continue;
14550
            }
14551
14552
            if (
14553 2
                $remove_empty_values
14554
                &&
14555 2
                \trim($str) === ''
14556
            ) {
14557 2
                continue;
14558
            }
14559
14560 2
            $return[] = $str;
14561
        }
14562
14563 2
        return $return;
14564
    }
14565
14566
    /**
14567
     * rxClass
14568
     *
14569
     * @param string $s
14570
     * @param string $class
14571
     *
14572
     * @psalm-pure
14573
     *
14574
     * @return string
14575
     *
14576
     * @noinspection ReturnTypeCanBeDeclaredInspection
14577
     */
14578 33
    private static function rxClass(string $s, string $class = '')
14579
    {
14580
        /**
14581
         * @psalm-suppress ImpureStaticVariable
14582
         *
14583
         * @var array<string,string>
14584
         */
14585 33
        static $RX_CLASS_CACHE = [];
14586
14587 33
        $cache_key = $s . '_' . $class;
14588
14589 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14590 21
            return $RX_CLASS_CACHE[$cache_key];
14591
        }
14592
14593 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14594
14595
        /** @noinspection SuspiciousLoopInspection */
14596
        /** @noinspection AlterInForeachInspection */
14597 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14598 15
            if ($s === '-') {
14599
                $class_array[0] = '-' . $class_array[0];
14600 15
            } elseif (!isset($s[2])) {
14601 15
                $class_array[0] .= \preg_quote($s, '/');
14602 1
            } elseif (self::strlen($s) === 1) {
14603 1
                $class_array[0] .= $s;
14604
            } else {
14605
                $class_array[] = $s;
14606
            }
14607
        }
14608
14609 16
        if ($class_array[0]) {
14610 16
            $class_array[0] = '[' . $class_array[0] . ']';
14611
        }
14612
14613 16
        if (\count($class_array) === 1) {
14614 16
            $return = $class_array[0];
14615
        } else {
14616
            $return = '(?:' . \implode('|', $class_array) . ')';
14617
        }
14618
14619 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14620
14621 16
        return $return;
14622
    }
14623
14624
    /**
14625
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14626
     *
14627
     * @param string $names
14628
     * @param string $delimiter
14629
     * @param string $encoding
14630
     *
14631
     * @psalm-pure
14632
     *
14633
     * @return string
14634
     *
14635
     * @noinspection ReturnTypeCanBeDeclaredInspection
14636
     */
14637 1
    private static function str_capitalize_name_helper(
14638
        string $names,
14639
        string $delimiter,
14640
        string $encoding = 'UTF-8'
14641
    ) {
14642
        // init
14643 1
        $name_helper_array = \explode($delimiter, $names);
14644 1
        if ($name_helper_array === false) {
14645
            return '';
14646
        }
14647
14648 1
        $special_cases = [
14649
            'names' => [
14650
                'ab',
14651
                'af',
14652
                'al',
14653
                'and',
14654
                'ap',
14655
                'bint',
14656
                'binte',
14657
                'da',
14658
                'de',
14659
                'del',
14660
                'den',
14661
                'der',
14662
                'di',
14663
                'dit',
14664
                'ibn',
14665
                'la',
14666
                'mac',
14667
                'nic',
14668
                'of',
14669
                'ter',
14670
                'the',
14671
                'und',
14672
                'van',
14673
                'von',
14674
                'y',
14675
                'zu',
14676
            ],
14677
            'prefixes' => [
14678
                'al-',
14679
                "d'",
14680
                'ff',
14681
                "l'",
14682
                'mac',
14683
                'mc',
14684
                'nic',
14685
            ],
14686
        ];
14687
14688 1
        foreach ($name_helper_array as &$name) {
14689 1
            if (\in_array($name, $special_cases['names'], true)) {
14690 1
                continue;
14691
            }
14692
14693 1
            $continue = false;
14694
14695 1
            if ($delimiter === '-') {
14696
                /** @noinspection AlterInForeachInspection */
14697 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14698 1
                    if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14699 1
                        $continue = true;
14700
14701 1
                        break;
14702
                    }
14703
                }
14704
            }
14705
14706
            /** @noinspection AlterInForeachInspection */
14707 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14708 1
                if (\strncmp($name, $beginning, \strlen($beginning)) === 0) {
14709 1
                    $continue = true;
14710
14711 1
                    break;
14712
                }
14713
            }
14714
14715 1
            if ($continue) {
14716 1
                continue;
14717
            }
14718
14719 1
            $name = self::ucfirst($name, $encoding);
14720
        }
14721
14722 1
        return \implode($delimiter, $name_helper_array);
14723
    }
14724
14725
    /**
14726
     * Generic case-sensitive transformation for collation matching.
14727
     *
14728
     * @param string $str <p>The input string</p>
14729
     *
14730
     * @psalm-pure
14731
     *
14732
     * @return string|null
14733
     */
14734 6
    private static function strtonatfold(string $str)
14735
    {
14736 6
        $str = \Normalizer::normalize($str, \Normalizer::NFD);
14737 6
        if ($str === false) {
14738 2
            return '';
14739
        }
14740
14741
        /** @noinspection PhpUndefinedClassInspection */
14742 6
        return \preg_replace(
14743 6
            '/\p{Mn}+/u',
14744 6
            '',
14745 6
            $str
14746
        );
14747
    }
14748
14749
    /**
14750
     * @param int|string $input
14751
     *
14752
     * @psalm-pure
14753
     *
14754
     * @return string
14755
     *
14756
     * @noinspection ReturnTypeCanBeDeclaredInspection
14757
     * @noinspection SuspiciousBinaryOperationInspection
14758
     */
14759 32
    private static function to_utf8_convert_helper($input)
14760
    {
14761
        // init
14762 32
        $buf = '';
14763
14764 32
        if (self::$ORD === null) {
14765 1
            self::$ORD = self::getData('ord');
14766
        }
14767
14768 32
        if (self::$CHR === null) {
14769 1
            self::$CHR = self::getData('chr');
14770
        }
14771
14772 32
        if (self::$WIN1252_TO_UTF8 === null) {
14773 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14774
        }
14775
14776 32
        $ordC1 = self::$ORD[$input];
14777 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14778 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14779
        } else {
14780
            /** @noinspection OffsetOperationsInspection */
14781 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14782 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
14783 1
            $buf .= $cc1 . $cc2;
14784
        }
14785
14786 32
        return $buf;
14787
    }
14788
14789
    /**
14790
     * @param string $str
14791
     *
14792
     * @psalm-pure
14793
     *
14794
     * @return string
14795
     *
14796
     * @noinspection ReturnTypeCanBeDeclaredInspection
14797
     */
14798 10
    private static function urldecode_unicode_helper(string $str)
14799
    {
14800 10
        if (\strpos($str, '%u') === false) {
14801 10
            return $str;
14802
        }
14803
14804 7
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14805 7
        if (\preg_match($pattern, $str)) {
14806 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14807
        }
14808
14809 7
        return $str;
14810
    }
14811
}
14812