Passed
Push — master ( 0828e1...eb7992 )
by Lars
09:51 queued 12s
created

UTF8   F

Complexity

Total Complexity 1741

Size/Duplication

Total Lines 14566
Duplicated Lines 0 %

Test Coverage

Coverage 80.75%

Importance

Changes 105
Bugs 53 Features 6
Metric Value
eloc 4452
c 105
b 53
f 6
dl 0
loc 14566
ccs 3012
cts 3730
cp 0.8075
rs 0.8
wmc 1741

309 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 2 1
B chr_to_decimal() 0 38 8
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A count_chars() 0 11 1
D chr() 0 108 18
A chr_to_int() 0 3 1
A chunk_split() 0 3 1
A clean() 0 48 6
B between() 0 48 8
A codepoints() 0 36 5
A chr_map() 0 5 1
A cleanup() 0 24 2
A char_at() 0 7 2
A chars() 0 4 1
A checkForSupport() 0 47 4
A chr_to_hex() 0 11 3
A collapse_whitespace() 0 8 2
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A chr_size_list() 0 17 3
A file_has_bom() 0 8 2
A filter_input() 0 16 3
A get_unique_string() 0 22 3
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 26 5
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A isBinary() 0 3 1
A emoji_decode() 0 18 2
A is_utf8() 0 13 4
A lcword() 0 13 1
A html_escape() 0 6 1
B get_file_type() 0 65 7
C is_utf16() 0 71 16
A isHtml() 0 3 1
C filter() 0 57 12
A isBase64() 0 3 1
A is_html() 0 14 2
A decode_mimeheader() 0 8 3
A html_decode() 0 6 1
A isUtf32() 0 3 1
A emoji_encode() 0 18 2
A is_alpha() 0 8 2
B get_random_string() 0 56 10
A first_char() 0 14 4
A css_identifier() 0 56 6
A isUtf8() 0 3 1
A css_stripe_media_queries() 0 6 1
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 3 1
A is_blank() 0 8 2
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A filter_var_array() 0 15 2
A decimal_to_chr() 0 3 1
A has_whitespace() 0 8 2
A lowerCaseFirst() 0 13 1
B is_binary() 0 38 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
B is_url() 0 44 7
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 4
A intl_loaded() 0 3 1
A mbstring_overloaded() 0 11 2
A html_stripe_empty_tags() 0 6 1
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A hasBom() 0 3 1
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A isAscii() 0 3 1
A filter_var() 0 15 2
A is_empty() 0 3 1
A isUtf16() 0 3 1
F encode() 0 144 37
C is_utf32() 0 71 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
A fix_simple_utf8() 0 32 4
A is_printable() 0 3 1
B is_json() 0 27 8
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A is_base64() 0 17 5
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 4 1
A isJson() 0 3 1
A filter_input_array() 0 15 3
A getSupportInfo() 0 13 3
A is_punctuation() 0 3 1
C html_entity_decode() 0 58 13
B file_get_contents() 0 56 11
A emoji_from_country_code() 0 17 3
A str_substr_after_first_separator() 0 28 6
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 74 10
A parse_str() 0 18 4
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A stripos_in_byte() 0 12 4
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 104 19
A str_isubstr_last() 0 25 4
A to_int() 0 7 2
A str_replace_beginning() 0 24 6
A remove_left() 0 24 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 104 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 61 13
A ltrim() 0 27 5
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 72 15
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A toUTF8() 0 3 1
A string() 0 16 4
D normalize_encoding() 0 147 16
B rxClass() 0 45 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 171 7
A normalize_whitespace() 0 9 1
A str_starts_with() 0 11 3
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 27 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 60 10
B urldecode() 0 51 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A str_split_array() 0 17 2
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 71 15
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 27 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 65 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 21 3
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 42 11
B str_contains_all() 0 24 9
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 72 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B rawurldecode() 0 51 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 86 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 18 1
A substr_iright() 0 15 4
A replace() 0 11 2
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 86 18
A substr_right() 0 31 6
D str_split() 0 138 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A remove_right() 0 25 4
F strrpos() 0 122 25
A remove_html_breaks() 0 3 1
A showSupport() 0 17 3
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 99 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
D to_utf8_string() 0 110 33
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 57 6
A str_sort() 0 15 3
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 13 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
B html_encode() 0 53 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C ord() 0 77 16
B to_string() 0 33 8
A strtonatfold() 0 7 1
C strcspn() 0 52 12
A fixStrCaseHelper() 0 41 5
B str_split_pattern() 0 49 11
D strstr() 0 95 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 142 32
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 42 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 60 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 7 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 29 4
B str_slice() 0 33 10
F strpos() 0 137 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8
A fix_utf8() 0 30 4
A to_iso8859() 0 16 4
A to_utf8() 0 14 3
A remove_duplicates() 0 16 4

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array{upper: string[], lower: string[]}
131
     */
132
    private static $COMMON_CASE_FOLD = [
133
        'upper' => [
134
            'µ',
135
            'ſ',
136
            "\xCD\x85",
137
            'ς',
138
            'ẞ',
139
            "\xCF\x90",
140
            "\xCF\x91",
141
            "\xCF\x95",
142
            "\xCF\x96",
143
            "\xCF\xB0",
144
            "\xCF\xB1",
145
            "\xCF\xB5",
146
            "\xE1\xBA\x9B",
147
            "\xE1\xBE\xBE",
148
        ],
149
        'lower' => [
150
            'μ',
151
            's',
152
            'ι',
153
            'σ',
154
            'ß',
155
            'β',
156
            'θ',
157
            'φ',
158
            'π',
159
            'κ',
160
            'ρ',
161
            'ε',
162
            "\xE1\xB9\xA1",
163
            'ι',
164
        ],
165
    ];
166
167
    /**
168
     * @var array<string, mixed>
169
     */
170
    private static $SUPPORT = [];
171
172
    /**
173
     * @var array<string, string>|null
174
     */
175
    private static $BROKEN_UTF8_FIX;
176
177
    /**
178
     * @var array<int, string>|null
179
     */
180
    private static $WIN1252_TO_UTF8;
181
182
    /**
183
     * @var array<int ,string>|null
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<int at position 2 could not be parsed: Expected '>' at position 2, but found 'int'.
Loading history...
184
     */
185
    private static $INTL_TRANSLITERATOR_LIST;
186
187
    /**
188
     * @var array<string>|null
189
     */
190
    private static $ENCODINGS;
191
192
    /**
193
     * @var array<string ,int>|null
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<string at position 2 could not be parsed: Expected '>' at position 2, but found 'string'.
Loading history...
194
     */
195
    private static $ORD;
196
197
    /**
198
     * @var array<string, string>|null
199
     */
200
    private static $EMOJI;
201
202
    /**
203
     * @var array<string>|null
204
     */
205
    private static $EMOJI_VALUES_CACHE;
206
207
    /**
208
     * @var array<string>|null
209
     */
210
    private static $EMOJI_KEYS_CACHE;
211
212
    /**
213
     * @var array<string>|null
214
     */
215
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
216
217
    /**
218
     * @var string[]|null
219
     *
220
     * @psalm-var array<int, string>|null
221
     */
222
    private static $CHR;
223
224
    /**
225
     * __construct()
226
     */
227 34
    public function __construct()
228
    {
229 34
    }
230
231
    /**
232
     * Return the character at the specified position: $str[1] like functionality.
233
     *
234
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
235
     *
236
     * @param string $str      <p>A UTF-8 string.</p>
237
     * @param int    $pos      <p>The position of character to return.</p>
238
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
239
     *
240
     * @psalm-pure
241
     *
242
     * @return string
243
     *                <p>Single multi-byte character.</p>
244
     */
245 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
246
    {
247 3
        if ($str === '' || $pos < 0) {
248 2
            return '';
249
        }
250
251 3
        if ($encoding === 'UTF-8') {
252 3
            return (string) \mb_substr($str, $pos, 1);
253
        }
254
255
        return (string) self::substr($str, $pos, 1, $encoding);
256
    }
257
258
    /**
259
     * Prepends UTF-8 BOM character to the string and returns the whole string.
260
     *
261
     * INFO: If BOM already existed there, the Input string is returned.
262
     *
263
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
264
     *
265
     * @param string $str <p>The input string.</p>
266
     *
267
     * @psalm-pure
268
     *
269
     * @return string
270
     *                <p>The output string that contains BOM.</p>
271
     */
272 2
    public static function add_bom_to_string(string $str): string
273
    {
274 2
        if (!self::string_has_bom($str)) {
275 2
            $str = self::bom() . $str;
276
        }
277
278 2
        return $str;
279
    }
280
281
    /**
282
     * Changes all keys in an array.
283
     *
284
     * @param array<string, mixed> $array    <p>The array to work on</p>
285
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
286
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
287
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string[]
292
     *                  <p>An array with its keys lower- or uppercased.</p>
293
     */
294 2
    public static function array_change_key_case(
295
        array $array,
296
        int $case = \CASE_LOWER,
297
        string $encoding = 'UTF-8'
298
    ): array {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @psalm-pure
331
     *
332
     * @return string
333
     */
334 16
    public static function between(
335
        string $str,
336
        string $start,
337
        string $end,
338
        int $offset = 0,
339
        string $encoding = 'UTF-8'
340
    ): string {
341 16
        if ($encoding === 'UTF-8') {
342 8
            $start_position = \mb_strpos($str, $start, $offset);
343 8
            if ($start_position === false) {
344 1
                return '';
345
            }
346
347 7
            $substr_index = $start_position + (int) \mb_strlen($start);
348 7
            $end_position = \mb_strpos($str, $end, $substr_index);
349
            if (
350 7
                $end_position === false
351
                ||
352 7
                $end_position === $substr_index
353
            ) {
354 2
                return '';
355
            }
356
357 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
358
        }
359
360 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
361
362 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
363 8
        if ($start_position === false) {
364 1
            return '';
365
        }
366
367 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
368 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
369
        if (
370 7
            $end_position === false
371
            ||
372 7
            $end_position === $substr_index
373
        ) {
374 2
            return '';
375
        }
376
377 5
        return (string) self::substr(
378 5
            $str,
379 5
            $substr_index,
380 5
            $end_position - $substr_index,
381 5
            $encoding
382
        );
383
    }
384
385
    /**
386
     * Convert binary into a string.
387
     *
388
     * INFO: opposite to UTF8::str_to_binary()
389
     *
390
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
391
     *
392
     * @param string $bin 1|0
393
     *
394
     * @psalm-pure
395
     *
396
     * @return string
397
     */
398 2
    public static function binary_to_str($bin): string
399
    {
400 2
        if (!isset($bin[0])) {
401
            return '';
402
        }
403
404 2
        $convert = \base_convert($bin, 2, 16);
405 2
        if ($convert === '0') {
406 1
            return '';
407
        }
408
409 2
        return \pack('H*', $convert);
410
    }
411
412
    /**
413
     * Returns the UTF-8 Byte Order Mark Character.
414
     *
415
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
416
     *
417
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
418
     *
419
     * @psalm-pure
420
     *
421
     * @return string
422
     *                <p>UTF-8 Byte Order Mark.</p>
423
     */
424 4
    public static function bom(): string
425
    {
426 4
        return "\xef\xbb\xbf";
427
    }
428
429
    /**
430
     * @alias of UTF8::chr_map()
431
     *
432
     * @param callable $callback
433
     * @param string   $str
434
     *
435
     * @psalm-pure
436
     *
437
     * @return string[]
438
     *
439
     * @see   UTF8::chr_map()
440
     */
441 2
    public static function callback($callback, string $str): array
442
    {
443 2
        return self::chr_map($callback, $str);
444
    }
445
446
    /**
447
     * Returns the character at $index, with indexes starting at 0.
448
     *
449
     * @param string $str      <p>The input string.</p>
450
     * @param int    $index    <p>Position of the character.</p>
451
     * @param string $encoding [optional] <p>Default is UTF-8</p>
452
     *
453
     * @psalm-pure
454
     *
455
     * @return string
456
     *                <p>The character at $index.</p>
457
     */
458 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
459
    {
460 9
        if ($encoding === 'UTF-8') {
461 5
            return (string) \mb_substr($str, $index, 1);
462
        }
463
464 4
        return (string) self::substr($str, $index, 1, $encoding);
465
    }
466
467
    /**
468
     * Returns an array consisting of the characters in the string.
469
     *
470
     * @param string $str <p>The input string.</p>
471
     *
472
     * @psalm-pure
473
     *
474
     * @return string[]
475
     *                  <p>An array of chars.</p>
476
     */
477 3
    public static function chars(string $str): array
478
    {
479
        /** @var string[] */
480 3
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
481
    }
482
483
    /**
484
     * This method will auto-detect your server environment for UTF-8 support.
485
     *
486
     * @return true|null
487
     *
488
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
489
     */
490 5
    public static function checkForSupport()
491
    {
492 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
493
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
494
495
            // http://php.net/manual/en/book.mbstring.php
496
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
497
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
498
            if (self::$SUPPORT['mbstring'] === true) {
499
                \mb_internal_encoding('UTF-8');
500
                /** @noinspection UnusedFunctionResultInspection */
501
                /** @noinspection PhpComposerExtensionStubsInspection */
502
                \mb_regex_encoding('UTF-8');
503
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
504
            }
505
506
            // http://php.net/manual/en/book.iconv.php
507
            self::$SUPPORT['iconv'] = self::iconv_loaded();
508
509
            // http://php.net/manual/en/book.intl.php
510
            self::$SUPPORT['intl'] = self::intl_loaded();
511
512
            // http://php.net/manual/en/class.intlchar.php
513
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
514
515
            // http://php.net/manual/en/book.ctype.php
516
            self::$SUPPORT['ctype'] = self::ctype_loaded();
517
518
            // http://php.net/manual/en/class.finfo.php
519
            self::$SUPPORT['finfo'] = self::finfo_loaded();
520
521
            // http://php.net/manual/en/book.json.php
522
            self::$SUPPORT['json'] = self::json_loaded();
523
524
            // http://php.net/manual/en/book.pcre.php
525
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
526
527
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
528
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
529
                \mb_internal_encoding('UTF-8');
530
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
531
            }
532
533
            return true;
534
        }
535
536 5
        return null;
537
    }
538
539
    /**
540
     * Generates a UTF-8 encoded character from the given code point.
541
     *
542
     * INFO: opposite to UTF8::ord()
543
     *
544
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
545
     *
546
     * @param int    $code_point <p>The code point for which to generate a character.</p>
547
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
548
     *
549
     * @psalm-pure
550
     *
551
     * @return string|null
552
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
553
     */
554 21
    public static function chr($code_point, string $encoding = 'UTF-8')
555
    {
556
        // init
557
        /**
558
         * @psalm-suppress ImpureStaticVariable
559
         *
560
         * @var array<string,string>
561
         */
562 21
        static $CHAR_CACHE = [];
563
564 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
565 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
566
        }
567
568
        if (
569 21
            $encoding !== 'UTF-8'
570
            &&
571 21
            $encoding !== 'ISO-8859-1'
572
            &&
573 21
            $encoding !== 'WINDOWS-1252'
574
            &&
575 21
            self::$SUPPORT['mbstring'] === false
576
        ) {
577
            /**
578
             * @psalm-suppress ImpureFunctionCall - is is only a warning
579
             */
580
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
581
        }
582
583 21
        if ($code_point <= 0) {
584 5
            return null;
585
        }
586
587 21
        $cache_key = $code_point . '_' . $encoding;
588 21
        if (isset($CHAR_CACHE[$cache_key])) {
589 19
            return $CHAR_CACHE[$cache_key];
590
        }
591
592 10
        if ($code_point <= 0x80) { // only for "simple"-chars
593
594 9
            if (self::$CHR === null) {
595
                self::$CHR = self::getData('chr');
596
            }
597
598
            /**
599
             * @psalm-suppress PossiblyNullArrayAccess
600
             */
601 9
            $chr = self::$CHR[$code_point];
602
603 9
            if ($encoding !== 'UTF-8') {
604 1
                $chr = self::encode($encoding, $chr);
605
            }
606
607 9
            return $CHAR_CACHE[$cache_key] = $chr;
608
        }
609
610
        //
611
        // fallback via "IntlChar"
612
        //
613
614 6
        if (self::$SUPPORT['intlChar'] === true) {
615
            /** @noinspection PhpComposerExtensionStubsInspection */
616 6
            $chr = \IntlChar::chr($code_point);
617
618 6
            if ($encoding !== 'UTF-8') {
619
                $chr = self::encode($encoding, $chr);
620
            }
621
622 6
            return $CHAR_CACHE[$cache_key] = $chr;
623
        }
624
625
        //
626
        // fallback via vanilla php
627
        //
628
629
        if (self::$CHR === null) {
630
            self::$CHR = self::getData('chr');
631
        }
632
633
        $code_point = (int) $code_point;
634
        if ($code_point <= 0x7FF) {
635
            /**
636
             * @psalm-suppress PossiblyNullArrayAccess
637
             */
638
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
639
                   self::$CHR[($code_point & 0x3F) + 0x80];
640
        } elseif ($code_point <= 0xFFFF) {
641
            /**
642
             * @psalm-suppress PossiblyNullArrayAccess
643
             */
644
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
645
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
646
                   self::$CHR[($code_point & 0x3F) + 0x80];
647
        } else {
648
            /**
649
             * @psalm-suppress PossiblyNullArrayAccess
650
             */
651
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
652
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
653
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
654
                   self::$CHR[($code_point & 0x3F) + 0x80];
655
        }
656
657
        if ($encoding !== 'UTF-8') {
658
            $chr = self::encode($encoding, $chr);
659
        }
660
661
        return $CHAR_CACHE[$cache_key] = $chr;
662
    }
663
664
    /**
665
     * Applies callback to all characters of a string.
666
     *
667
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
668
     *
669
     * @param callable $callback <p>The callback function.</p>
670
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
671
     *
672
     * @psalm-pure
673
     *
674
     * @return string[]
675
     *                  <p>The outcome of the callback, as array.</p>
676
     */
677 2
    public static function chr_map($callback, string $str): array
678
    {
679 2
        return \array_map(
680 2
            $callback,
681 2
            self::str_split($str)
682
        );
683
    }
684
685
    /**
686
     * Generates an array of byte length of each character of a Unicode string.
687
     *
688
     * 1 byte => U+0000  - U+007F
689
     * 2 byte => U+0080  - U+07FF
690
     * 3 byte => U+0800  - U+FFFF
691
     * 4 byte => U+10000 - U+10FFFF
692
     *
693
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
694
     *
695
     * @param string $str <p>The original unicode string.</p>
696
     *
697
     * @psalm-pure
698
     *
699
     * @return int[]
700
     *               <p>An array of byte lengths of each character.</p>
701
     */
702 4
    public static function chr_size_list(string $str): array
703
    {
704 4
        if ($str === '') {
705 4
            return [];
706
        }
707
708 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
709
            return \array_map(
710
                static function (string $data): int {
711
                    // "mb_" is available if overload is used, so use it ...
712
                    return \mb_strlen($data, 'CP850'); // 8-BIT
713
                },
714
                self::str_split($str)
715
            );
716
        }
717
718 4
        return \array_map('\strlen', self::str_split($str));
719
    }
720
721
    /**
722
     * Get a decimal code representation of a specific character.
723
     *
724
     * INFO: opposite to UTF8::decimal_to_chr()
725
     *
726
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
727
     *
728
     * @param string $char <p>The input character.</p>
729
     *
730
     * @psalm-pure
731
     *
732
     * @return int
733
     */
734 5
    public static function chr_to_decimal(string $char): int
735
    {
736 5
        if (self::$SUPPORT['iconv'] === true) {
737 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
738 5
            if ($chr_tmp !== false) {
739
                /** @noinspection OffsetOperationsInspection */
740 5
                return \unpack('V', $chr_tmp)[1];
741
            }
742
        }
743
744
        $code = self::ord($char[0]);
745
        $bytes = 1;
746
747
        if (!($code & 0x80)) {
748
            // 0xxxxxxx
749
            return $code;
750
        }
751
752
        if (($code & 0xe0) === 0xc0) {
753
            // 110xxxxx
754
            $bytes = 2;
755
            $code &= ~0xc0;
756
        } elseif (($code & 0xf0) === 0xe0) {
757
            // 1110xxxx
758
            $bytes = 3;
759
            $code &= ~0xe0;
760
        } elseif (($code & 0xf8) === 0xf0) {
761
            // 11110xxx
762
            $bytes = 4;
763
            $code &= ~0xf0;
764
        }
765
766
        for ($i = 2; $i <= $bytes; ++$i) {
767
            // 10xxxxxx
768
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
769
        }
770
771
        return $code;
772
    }
773
774
    /**
775
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
776
     *
777
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
778
     *
779
     * @param int|string $char   <p>The input character</p>
780
     * @param string     $prefix [optional]
781
     *
782
     * @psalm-pure
783
     *
784
     * @return string
785
     *                <p>The code point encoded as U+xxxx.</p>
786
     */
787 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
788
    {
789 2
        if ($char === '') {
790 2
            return '';
791
        }
792
793 2
        if ($char === '&#0;') {
794 2
            $char = '';
795
        }
796
797 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
798
    }
799
800
    /**
801
     * alias for "UTF8::chr_to_decimal()"
802
     *
803
     * @param string $chr
804
     *
805
     * @psalm-pure
806
     *
807
     * @return int
808
     *
809
     * @see        UTF8::chr_to_decimal()
810
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
811
     */
812 2
    public static function chr_to_int(string $chr): int
813
    {
814 2
        return self::chr_to_decimal($chr);
815
    }
816
817
    /**
818
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
819
     *
820
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
821
     *
822
     * @param string $body         <p>The original string to be split.</p>
823
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
824
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
825
     *
826
     * @psalm-pure
827
     *
828
     * @return string
829
     *                <p>The chunked string.</p>
830
     */
831 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
832
    {
833 4
        return \implode($end, self::str_split($body, $chunk_length));
834
    }
835
836
    /**
837
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
838
     *
839
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
840
     *
841
     * @param string $str                                     <p>The string to be sanitized.</p>
842
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
843
     *                                                        UTF-BOM.</p>
844
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
845
     *                                                        whitespace.</p>
846
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
847
     *                                                        Word chars e.g.: "…"
848
     *                                                        => "..."</p>
849
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
850
     *                                                        in
851
     *                                                        combination with
852
     *                                                        $normalize_whitespace</p>
853
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
854
     *                                                        question mark e.g.: "�"</p>
855
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
856
     *                                                        invisible characters e.g.: "\0"</p>
857
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
858
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
859
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
860
     *                                                        </p>
861
     *
862
     * @psalm-pure
863
     *
864
     * @return string
865
     *                <p>An clean UTF-8 encoded string.</p>
866
     *
867
     * @noinspection PhpTooManyParametersInspection
868
     */
869 89
    public static function clean(
870
        string $str,
871
        bool $remove_bom = false,
872
        bool $normalize_whitespace = false,
873
        bool $normalize_msword = false,
874
        bool $keep_non_breaking_space = false,
875
        bool $replace_diamond_question_mark = false,
876
        bool $remove_invisible_characters = true,
877
        bool $remove_invisible_characters_url_encoded = false
878
    ): string {
879
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
880
        // caused connection reset problem on larger strings
881
882 89
        $regex = '/
883
          (
884
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
885
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
886
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
887
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
888
            ){1,100}                      # ...one or more times
889
          )
890
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
891
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
892
        /x';
893
        /** @noinspection NotOptimalRegularExpressionsInspection */
894 89
        $str = (string) \preg_replace($regex, '$1', $str);
895
896 89
        if ($replace_diamond_question_mark) {
897 33
            $str = self::replace_diamond_question_mark($str);
898
        }
899
900 89
        if ($remove_invisible_characters) {
901 89
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
902
        }
903
904 89
        if ($normalize_whitespace) {
905 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
906
        }
907
908 89
        if ($normalize_msword) {
909 4
            $str = self::normalize_msword($str);
910
        }
911
912 89
        if ($remove_bom) {
913 37
            $str = self::remove_bom($str);
914
        }
915
916 89
        return $str;
917
    }
918
919
    /**
920
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
921
     *
922
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
923
     *
924
     * @param string $str <p>The input string.</p>
925
     *
926
     * @psalm-pure
927
     *
928
     * @return string
929
     */
930 33
    public static function cleanup($str): string
931
    {
932
        // init
933 33
        $str = (string) $str;
934
935 33
        if ($str === '') {
936 5
            return '';
937
        }
938
939
        // fixed ISO <-> UTF-8 Errors
940 33
        $str = self::fix_simple_utf8($str);
941
942
        // remove all none UTF-8 symbols
943
        // && remove diamond question mark (�)
944
        // && remove remove invisible characters (e.g. "\0")
945
        // && remove BOM
946
        // && normalize whitespace chars (but keep non-breaking-spaces)
947 33
        return self::clean(
948 33
            $str,
949 33
            true,
950 33
            true,
951 33
            false,
952 33
            true,
953 33
            true
954
        );
955
    }
956
957
    /**
958
     * Accepts a string or a array of strings and returns an array of Unicode code points.
959
     *
960
     * INFO: opposite to UTF8::string()
961
     *
962
     * EXAMPLE: <code>
963
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
964
     * // ... OR ...
965
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
966
     * </code>
967
     *
968
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
969
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
970
     *                                     default, code points will be returned as integers.</p>
971
     *
972
     * @psalm-pure
973
     *
974
     * @return int[]|string[]
975
     *                        <p>
976
     *                        The array of code points:<br>
977
     *                        int[] for $u_style === false<br>
978
     *                        string[] for $u_style === true<br>
979
     *                        </p>
980
     */
981 12
    public static function codepoints($arg, bool $use_u_style = false): array
982
    {
983 12
        if (\is_string($arg)) {
984 12
            $arg = self::str_split($arg);
985
        }
986
987
        /**
988
         * @psalm-suppress DocblockTypeContradiction
989
         */
990 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
991 4
            return [];
992
        }
993
994 12
        if ($arg === []) {
995 7
            return [];
996
        }
997
998 11
        $arg = \array_map(
999
            [
1000 11
                self::class,
1001
                'ord',
1002
            ],
1003 11
            $arg
1004
        );
1005
1006 11
        if ($use_u_style) {
1007 2
            $arg = \array_map(
1008
                [
1009 2
                    self::class,
1010
                    'int_to_hex',
1011
                ],
1012 2
                $arg
1013
            );
1014
        }
1015
1016 11
        return $arg;
1017
    }
1018
1019
    /**
1020
     * Trims the string and replaces consecutive whitespace characters with a
1021
     * single space. This includes tabs and newline characters, as well as
1022
     * multibyte whitespace such as the thin space and ideographic space.
1023
     *
1024
     * @param string $str <p>The input string.</p>
1025
     *
1026
     * @psalm-pure
1027
     *
1028
     * @return string
1029
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1030
     */
1031 13
    public static function collapse_whitespace(string $str): string
1032
    {
1033 13
        if (self::$SUPPORT['mbstring'] === true) {
1034
            /** @noinspection PhpComposerExtensionStubsInspection */
1035 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1036
        }
1037
1038
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1039
    }
1040
1041
    /**
1042
     * Returns count of characters used in a string.
1043
     *
1044
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1045
     *
1046
     * @param string $str                     <p>The input string.</p>
1047
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1048
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1049
     *
1050
     * @psalm-pure
1051
     *
1052
     * @return int[]
1053
     *               <p>An associative array of Character as keys and
1054
     *               their count as values.</p>
1055
     */
1056 19
    public static function count_chars(
1057
        string $str,
1058
        bool $clean_utf8 = false,
1059
        bool $try_to_use_mb_functions = true
1060
    ): array {
1061 19
        return \array_count_values(
1062 19
            self::str_split(
1063 19
                $str,
1064 19
                1,
1065 19
                $clean_utf8,
1066 19
                $try_to_use_mb_functions
1067
            )
1068
        );
1069
    }
1070
1071
    /**
1072
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1073
     *
1074
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1075
     *
1076
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1077
     *
1078
     * @param string   $str         <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1079
     * @param string[] $filter
1080
     * @param bool     $stripe_tags
1081
     * @param bool     $strtolower
1082
     *
1083
     * @psalm-pure
1084
     *
1085
     * @return string
1086
     *
1087
     * @psalm-param array<string,string> $filter
1088
     */
1089 1
    public static function css_identifier(
1090
        string $str = '',
1091
        array $filter = [
1092
            ' ' => '-',
1093
            '/' => '-',
1094
            '[' => '',
1095
            ']' => '',
1096
        ],
1097
        bool $stripe_tags = false,
1098
        bool $strtolower = true
1099
    ): string {
1100
        // We could also use strtr() here but its much slower than str_replace(). In
1101
        // order to keep '__' to stay '__' we first replace it with a different
1102
        // placeholder after checking that it is not defined as a filter.
1103 1
        $double_underscore_replacements = 0;
1104
1105
        // Fallback ...
1106 1
        if (\trim($str) === '') {
1107 1
            $str = \uniqid('auto-generated-css-class', true);
1108
        } else {
1109 1
            $str = self::clean($str);
1110
        }
1111
1112 1
        if ($stripe_tags) {
1113
            $str = \strip_tags($str);
1114
        }
1115
1116 1
        if ($strtolower) {
1117 1
            $str = \strtolower($str);
1118
        }
1119
1120 1
        if (!isset($filter['__'])) {
1121 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1122
        }
1123
1124
        /* @noinspection ArrayValuesMissUseInspection */
1125 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1126
        // Replace temporary placeholder '##' with '__' only if the original
1127
        // $identifier contained '__'.
1128 1
        if ($double_underscore_replacements > 0) {
1129
            $str = \str_replace('##', '__', $str);
1130
        }
1131
1132
        // Valid characters in a CSS identifier are:
1133
        // - the hyphen (U+002D)
1134
        // - a-z (U+0030 - U+0039)
1135
        // - A-Z (U+0041 - U+005A)
1136
        // - the underscore (U+005F)
1137
        // - 0-9 (U+0061 - U+007A)
1138
        // - ISO 10646 characters U+00A1 and higher
1139
        // We strip out any character not in the above list.
1140 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1141
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1142 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1143
1144 1
        return \trim($str, '-');
1145
    }
1146
1147
    /**
1148
     * Remove css media-queries.
1149
     *
1150
     * @param string $str
1151
     *
1152
     * @psalm-pure
1153
     *
1154
     * @return string
1155
     */
1156 1
    public static function css_stripe_media_queries(string $str): string
1157
    {
1158 1
        return (string) \preg_replace(
1159 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1160 1
            '',
1161 1
            $str
1162
        );
1163
    }
1164
1165
    /**
1166
     * Checks whether ctype is available on the server.
1167
     *
1168
     * @psalm-pure
1169
     *
1170
     * @return bool
1171
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1172
     */
1173
    public static function ctype_loaded(): bool
1174
    {
1175
        return \extension_loaded('ctype');
1176
    }
1177
1178
    /**
1179
     * Converts an int value into a UTF-8 character.
1180
     *
1181
     * INFO: opposite to UTF8::string()
1182
     *
1183
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1184
     *
1185
     * @param int|string $int
1186
     *
1187
     * @psalm-param int|numeric-string $int
1188
     *
1189
     * @psalm-pure
1190
     *
1191
     * @return string
1192
     */
1193 20
    public static function decimal_to_chr($int): string
1194
    {
1195 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1196
    }
1197
1198
    /**
1199
     * Decodes a MIME header field
1200
     *
1201
     * @param string $str
1202
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1203
     *
1204
     * @psalm-pure
1205
     *
1206
     * @return false|string
1207
     *                      <p>A decoded MIME field on success,
1208
     *                      or false if an error occurs during the decoding.</p>
1209
     */
1210 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1211
    {
1212 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1213 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1214
        }
1215
1216
        // always fallback via symfony polyfill
1217 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1218
    }
1219
1220
    /**
1221
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1222
     *
1223
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1224
     *
1225
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1226
     *
1227
     * @return string
1228
     *                <p>Emoji or empty string on error.</p>
1229
     */
1230 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1231
    {
1232 1
        if ($country_code_iso_3166_1 === '') {
1233 1
            return '';
1234
        }
1235
1236 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1237 1
            return '';
1238
        }
1239
1240 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1241
1242 1
        $flagOffset = 0x1F1E6;
1243 1
        $asciiOffset = 0x41;
1244
1245 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1246 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1247
    }
1248
1249
    /**
1250
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1251
     *
1252
     * INFO: opposite to UTF8::emoji_encode()
1253
     *
1254
     * EXAMPLE: <code>
1255
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1256
     * //
1257
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1258
     * </code>
1259
     *
1260
     * @param string $str                            <p>The input string.</p>
1261
     * @param bool   $use_reversible_string_mappings [optional] <p>
1262
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1263
     *                                               between "emoji_encode" and "emoji_decode".</p>
1264
     *
1265
     * @psalm-pure
1266
     *
1267
     * @return string
1268
     */
1269 9
    public static function emoji_decode(
1270
        string $str,
1271
        bool $use_reversible_string_mappings = false
1272
    ): string {
1273 9
        self::initEmojiData();
1274
1275 9
        if ($use_reversible_string_mappings) {
1276 9
            return (string) \str_replace(
1277 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1278 9
                (array) self::$EMOJI_VALUES_CACHE,
1279 9
                $str
1280
            );
1281
        }
1282
1283 1
        return (string) \str_replace(
1284 1
            (array) self::$EMOJI_KEYS_CACHE,
1285 1
            (array) self::$EMOJI_VALUES_CACHE,
1286 1
            $str
1287
        );
1288
    }
1289
1290
    /**
1291
     * Encode a string with emoji chars into a non-emoji string.
1292
     *
1293
     * INFO: opposite to UTF8::emoji_decode()
1294
     *
1295
     * EXAMPLE: <code>
1296
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1297
     * //
1298
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1299
     * </code>
1300
     *
1301
     * @param string $str                            <p>The input string</p>
1302
     * @param bool   $use_reversible_string_mappings [optional] <p>
1303
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1304
     *                                               between "emoji_encode" and "emoji_decode"</p>
1305
     *
1306
     * @psalm-pure
1307
     *
1308
     * @return string
1309
     */
1310 12
    public static function emoji_encode(
1311
        string $str,
1312
        bool $use_reversible_string_mappings = false
1313
    ): string {
1314 12
        self::initEmojiData();
1315
1316 12
        if ($use_reversible_string_mappings) {
1317 9
            return (string) \str_replace(
1318 9
                (array) self::$EMOJI_VALUES_CACHE,
1319 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1320 9
                $str
1321
            );
1322
        }
1323
1324 4
        return (string) \str_replace(
1325 4
            (array) self::$EMOJI_VALUES_CACHE,
1326 4
            (array) self::$EMOJI_KEYS_CACHE,
1327 4
            $str
1328
        );
1329
    }
1330
1331
    /**
1332
     * Encode a string with a new charset-encoding.
1333
     *
1334
     * INFO:  This function will also try to fix broken / double encoding,
1335
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1336
     *
1337
     * EXAMPLE: <code>
1338
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1339
     * //
1340
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1341
     * //
1342
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1343
     * //
1344
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1345
     * </code>
1346
     *
1347
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1348
     * @param string $str                           <p>The input string</p>
1349
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1350
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1351
     *                                              string-encoding</p>
1352
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1353
     *                                              A empty string will trigger the autodetect anyway.</p>
1354
     *
1355
     * @psalm-pure
1356
     *
1357
     * @return string
1358
     *
1359
     * @psalm-suppress InvalidReturnStatement
1360
     */
1361 29
    public static function encode(
1362
        string $to_encoding,
1363
        string $str,
1364
        bool $auto_detect_the_from_encoding = true,
1365
        string $from_encoding = ''
1366
    ): string {
1367 29
        if ($str === '' || $to_encoding === '') {
1368 13
            return $str;
1369
        }
1370
1371 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1372 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1373
        }
1374
1375 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1376 2
            $from_encoding = self::normalize_encoding($from_encoding);
1377
        }
1378
1379
        if (
1380 29
            $to_encoding
1381
            &&
1382 29
            $from_encoding
1383
            &&
1384 29
            $from_encoding === $to_encoding
1385
        ) {
1386
            return $str;
1387
        }
1388
1389 29
        if ($to_encoding === 'JSON') {
1390 1
            $return = self::json_encode($str);
1391 1
            if ($return === false) {
1392
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1393
            }
1394
1395 1
            return $return;
1396
        }
1397 29
        if ($from_encoding === 'JSON') {
1398 1
            $str = self::json_decode($str);
1399 1
            $from_encoding = '';
1400
        }
1401
1402 29
        if ($to_encoding === 'BASE64') {
1403 2
            return \base64_encode($str);
1404
        }
1405 29
        if ($from_encoding === 'BASE64') {
1406 2
            $str = \base64_decode($str, true);
1407 2
            $from_encoding = '';
1408
        }
1409
1410 29
        if ($to_encoding === 'HTML-ENTITIES') {
1411 2
            return self::html_encode($str, true);
1412
        }
1413 29
        if ($from_encoding === 'HTML-ENTITIES') {
1414 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1415 2
            $from_encoding = '';
1416
        }
1417
1418 29
        $from_encoding_auto_detected = false;
1419
        if (
1420 29
            $auto_detect_the_from_encoding
1421
            ||
1422 29
            !$from_encoding
1423
        ) {
1424 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1425
        }
1426
1427
        // DEBUG
1428
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1429
1430 29
        if ($from_encoding_auto_detected !== false) {
1431
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1432 25
            $from_encoding = $from_encoding_auto_detected;
1433 7
        } elseif ($auto_detect_the_from_encoding) {
1434
            // fallback for the "autodetect"-mode
1435 7
            return self::to_utf8($str);
1436
        }
1437
1438
        if (
1439 25
            !$from_encoding
1440
            ||
1441 25
            $from_encoding === $to_encoding
1442
        ) {
1443 15
            return $str;
1444
        }
1445
1446
        if (
1447 20
            $to_encoding === 'UTF-8'
1448
            &&
1449
            (
1450 18
                $from_encoding === 'WINDOWS-1252'
1451
                ||
1452 20
                $from_encoding === 'ISO-8859-1'
1453
            )
1454
        ) {
1455 14
            return self::to_utf8($str);
1456
        }
1457
1458
        if (
1459 12
            $to_encoding === 'ISO-8859-1'
1460
            &&
1461
            (
1462 6
                $from_encoding === 'WINDOWS-1252'
1463
                ||
1464 12
                $from_encoding === 'UTF-8'
1465
            )
1466
        ) {
1467 6
            return self::to_iso8859($str);
1468
        }
1469
1470
        if (
1471 10
            $to_encoding !== 'UTF-8'
1472
            &&
1473 10
            $to_encoding !== 'ISO-8859-1'
1474
            &&
1475 10
            $to_encoding !== 'WINDOWS-1252'
1476
            &&
1477 10
            self::$SUPPORT['mbstring'] === false
1478
        ) {
1479
            /**
1480
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1481
             */
1482
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1483
        }
1484
1485 10
        if (self::$SUPPORT['mbstring'] === true) {
1486
            // warning: do not use the symfony polyfill here
1487 10
            $str_encoded = \mb_convert_encoding(
1488 10
                $str,
1489 10
                $to_encoding,
1490 10
                $from_encoding
1491
            );
1492
1493 10
            if ($str_encoded) {
1494 10
                return $str_encoded;
1495
            }
1496
        }
1497
1498
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1499
        $return = @\iconv($from_encoding, $to_encoding, $str);
1500
        if ($return !== false) {
1501
            return $return;
1502
        }
1503
1504
        return $str;
1505
    }
1506
1507
    /**
1508
     * @param string $str
1509
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1510
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1511
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1512
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1513
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1514
     *
1515
     * @psalm-pure
1516
     *
1517
     * @return false|string
1518
     *                      <p>An encoded MIME field on success,
1519
     *                      or false if an error occurs during the encoding.</p>
1520
     */
1521 1
    public static function encode_mimeheader(
1522
        string $str,
1523
        string $from_charset = 'UTF-8',
1524
        string $to_charset = 'UTF-8',
1525
        string $transfer_encoding = 'Q',
1526
        string $linefeed = "\r\n",
1527
        int $indent = 76
1528
    ) {
1529 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1530
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1531
        }
1532
1533 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1534 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1535
        }
1536
1537
        // always fallback via symfony polyfill
1538 1
        return \iconv_mime_encode(
1539 1
            '',
1540 1
            $str,
1541
            [
1542 1
                'scheme'           => $transfer_encoding,
1543 1
                'line-length'      => $indent,
1544 1
                'input-charset'    => $from_charset,
1545 1
                'output-charset'   => $to_charset,
1546 1
                'line-break-chars' => $linefeed,
1547
            ]
1548
        );
1549
    }
1550
1551
    /**
1552
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1553
     *
1554
     * @param string   $str                       <p>The input string.</p>
1555
     * @param string   $search                    <p>The searched string.</p>
1556
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1557
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1558
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1559
     *
1560
     * @psalm-pure
1561
     *
1562
     * @return string
1563
     */
1564 1
    public static function extract_text(
1565
        string $str,
1566
        string $search = '',
1567
        int $length = null,
1568
        string $replacer_for_skipped_text = '…',
1569
        string $encoding = 'UTF-8'
1570
    ): string {
1571 1
        if ($str === '') {
1572 1
            return '';
1573
        }
1574
1575 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1576
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1577
        }
1578
1579 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1580
1581 1
        if ($length === null) {
1582 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1583
        }
1584
1585 1
        if ($search === '') {
1586 1
            if ($encoding === 'UTF-8') {
1587 1
                if ($length > 0) {
1588 1
                    $string_length = (int) \mb_strlen($str);
1589 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1590
                } else {
1591 1
                    $end = 0;
1592
                }
1593
1594 1
                $pos = (int) \min(
1595 1
                    \mb_strpos($str, ' ', $end),
1596 1
                    \mb_strpos($str, '.', $end)
1597
                );
1598
            } else {
1599
                if ($length > 0) {
1600
                    $string_length = (int) self::strlen($str, $encoding);
1601
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1602
                } else {
1603
                    $end = 0;
1604
                }
1605
1606
                $pos = (int) \min(
1607
                    self::strpos($str, ' ', $end, $encoding),
1608
                    self::strpos($str, '.', $end, $encoding)
1609
                );
1610
            }
1611
1612 1
            if ($pos) {
1613 1
                if ($encoding === 'UTF-8') {
1614 1
                    $str_sub = \mb_substr($str, 0, $pos);
1615
                } else {
1616
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1617
                }
1618
1619 1
                if ($str_sub === false) {
1620
                    return '';
1621
                }
1622
1623 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1624
            }
1625
1626
            return $str;
1627
        }
1628
1629 1
        if ($encoding === 'UTF-8') {
1630 1
            $word_position = (int) \mb_stripos($str, $search);
1631 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1632
        } else {
1633
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1634
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1635
        }
1636
1637 1
        $pos_start = 0;
1638 1
        if ($half_side > 0) {
1639 1
            if ($encoding === 'UTF-8') {
1640 1
                $half_text = \mb_substr($str, 0, $half_side);
1641
            } else {
1642
                $half_text = self::substr($str, 0, $half_side, $encoding);
1643
            }
1644 1
            if ($half_text !== false) {
1645 1
                if ($encoding === 'UTF-8') {
1646 1
                    $pos_start = (int) \max(
1647 1
                        \mb_strrpos($half_text, ' '),
1648 1
                        \mb_strrpos($half_text, '.')
1649
                    );
1650
                } else {
1651
                    $pos_start = (int) \max(
1652
                        self::strrpos($half_text, ' ', 0, $encoding),
1653
                        self::strrpos($half_text, '.', 0, $encoding)
1654
                    );
1655
                }
1656
            }
1657
        }
1658
1659 1
        if ($word_position && $half_side > 0) {
1660 1
            $offset = $pos_start + $length - 1;
1661 1
            $real_length = (int) self::strlen($str, $encoding);
1662
1663 1
            if ($offset > $real_length) {
1664
                $offset = $real_length;
1665
            }
1666
1667 1
            if ($encoding === 'UTF-8') {
1668 1
                $pos_end = (int) \min(
1669 1
                    \mb_strpos($str, ' ', $offset),
1670 1
                    \mb_strpos($str, '.', $offset)
1671 1
                ) - $pos_start;
1672
            } else {
1673
                $pos_end = (int) \min(
1674
                    self::strpos($str, ' ', $offset, $encoding),
1675
                    self::strpos($str, '.', $offset, $encoding)
1676
                ) - $pos_start;
1677
            }
1678
1679 1
            if (!$pos_end || $pos_end <= 0) {
1680 1
                if ($encoding === 'UTF-8') {
1681 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1682
                } else {
1683
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1684
                }
1685 1
                if ($str_sub !== false) {
1686 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1687
                } else {
1688 1
                    $extract = '';
1689
                }
1690
            } else {
1691 1
                if ($encoding === 'UTF-8') {
1692 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1693
                } else {
1694
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1695
                }
1696 1
                if ($str_sub !== false) {
1697 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1698
                } else {
1699 1
                    $extract = '';
1700
                }
1701
            }
1702
        } else {
1703 1
            $offset = $length - 1;
1704 1
            $true_length = (int) self::strlen($str, $encoding);
1705
1706 1
            if ($offset > $true_length) {
1707
                $offset = $true_length;
1708
            }
1709
1710 1
            if ($encoding === 'UTF-8') {
1711 1
                $pos_end = (int) \min(
1712 1
                    \mb_strpos($str, ' ', $offset),
1713 1
                    \mb_strpos($str, '.', $offset)
1714
                );
1715
            } else {
1716
                $pos_end = (int) \min(
1717
                    self::strpos($str, ' ', $offset, $encoding),
1718
                    self::strpos($str, '.', $offset, $encoding)
1719
                );
1720
            }
1721
1722 1
            if ($pos_end) {
1723 1
                if ($encoding === 'UTF-8') {
1724 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1725
                } else {
1726
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1727
                }
1728 1
                if ($str_sub !== false) {
1729 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1730
                } else {
1731 1
                    $extract = '';
1732
                }
1733
            } else {
1734 1
                $extract = $str;
1735
            }
1736
        }
1737
1738 1
        return $extract;
1739
    }
1740
1741
    /**
1742
     * Reads entire file into a string.
1743
     *
1744
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1745
     *
1746
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1747
     *
1748
     * @see http://php.net/manual/en/function.file-get-contents.php
1749
     *
1750
     * @param string        $filename         <p>
1751
     *                                        Name of the file to read.
1752
     *                                        </p>
1753
     * @param bool          $use_include_path [optional] <p>
1754
     *                                        Prior to PHP 5, this parameter is called
1755
     *                                        use_include_path and is a bool.
1756
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1757
     *                                        to trigger include path
1758
     *                                        search.
1759
     *                                        </p>
1760
     * @param resource|null $context          [optional] <p>
1761
     *                                        A valid context resource created with
1762
     *                                        stream_context_create. If you don't need to use a
1763
     *                                        custom context, you can skip this parameter by &null;.
1764
     *                                        </p>
1765
     * @param int|null      $offset           [optional] <p>
1766
     *                                        The offset where the reading starts.
1767
     *                                        </p>
1768
     * @param int|null      $max_length       [optional] <p>
1769
     *                                        Maximum length of data read. The default is to read until end
1770
     *                                        of file is reached.
1771
     *                                        </p>
1772
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1773
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1774
     *                                        some files, because they used non default utf-8 chars. Binary files
1775
     *                                        like images or pdf will not be converted.</p>
1776
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1777
     *                                        A empty string will trigger the autodetect anyway.</p>
1778
     *
1779
     * @psalm-pure
1780
     *
1781
     * @return false|string
1782
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1783
     *
1784
     * @noinspection PhpTooManyParametersInspection
1785
     */
1786 12
    public static function file_get_contents(
1787
        string $filename,
1788
        bool $use_include_path = false,
1789
        $context = null,
1790
        int $offset = null,
1791
        int $max_length = null,
1792
        int $timeout = 10,
1793
        bool $convert_to_utf8 = true,
1794
        string $from_encoding = ''
1795
    ) {
1796
        // init
1797 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1798
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1799 12
        if ($filename === false) {
1800
            return false;
1801
        }
1802
1803 12
        if ($timeout && $context === null) {
1804 9
            $context = \stream_context_create(
1805
                [
1806
                    'http' => [
1807 9
                        'timeout' => $timeout,
1808
                    ],
1809
                ]
1810
            );
1811
        }
1812
1813 12
        if ($offset === null) {
1814 12
            $offset = 0;
1815
        }
1816
1817 12
        if (\is_int($max_length)) {
1818 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1819
        } else {
1820 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1821
        }
1822
1823
        // return false on error
1824 12
        if ($data === false) {
1825
            return false;
1826
        }
1827
1828 12
        if ($convert_to_utf8) {
1829
            if (
1830 12
                !self::is_binary($data, true)
1831
                ||
1832 9
                self::is_utf16($data, false) !== false
1833
                ||
1834 12
                self::is_utf32($data, false) !== false
1835
            ) {
1836 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1837 9
                $data = self::cleanup($data);
1838
            }
1839
        }
1840
1841 12
        return $data;
1842
    }
1843
1844
    /**
1845
     * Checks if a file starts with BOM (Byte Order Mark) character.
1846
     *
1847
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1848
     *
1849
     * @param string $file_path <p>Path to a valid file.</p>
1850
     *
1851
     * @throws \RuntimeException if file_get_contents() returned false
1852
     *
1853
     * @return bool
1854
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1855
     *
1856
     * @psalm-pure
1857
     */
1858 2
    public static function file_has_bom(string $file_path): bool
1859
    {
1860 2
        $file_content = \file_get_contents($file_path);
1861 2
        if ($file_content === false) {
1862
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1863
        }
1864
1865 2
        return self::string_has_bom($file_content);
1866
    }
1867
1868
    /**
1869
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1870
     *
1871
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1872
     *
1873
     * @param array|object|string $var
1874
     * @param int                 $normalization_form
1875
     * @param string              $leading_combining
1876
     *
1877
     * @psalm-pure
1878
     *
1879
     * @return mixed
1880
     *
1881
     * @template TFilter
1882
     * @psalm-param TFilter $var
1883
     * @psalm-return TFilter
1884
     */
1885 65
    public static function filter(
1886
        $var,
1887
        int $normalization_form = \Normalizer::NFC,
1888
        string $leading_combining = '◌'
1889
    ) {
1890 65
        switch (\gettype($var)) {
1891 65
            case 'object':
1892 65
            case 'array':
1893 6
                foreach ($var as $k => &$v) {
1894 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1895
                }
1896 6
                unset($v);
1897
1898 6
                break;
1899 65
            case 'string':
1900
1901 63
                if (\strpos($var, "\r") !== false) {
1902 3
                    $var = self::normalize_line_ending($var);
1903
                }
1904
1905 63
                if (!ASCII::is_ascii($var)) {
1906 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1907 27
                        $n = '-';
1908
                    } else {
1909 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1910
1911 13
                        if (isset($n[0])) {
1912 7
                            $var = $n;
1913
                        } else {
1914 9
                            $var = self::encode('UTF-8', $var);
1915
                        }
1916
                    }
1917
1918 33
                    \assert(\is_string($var));
1919
                    if (
1920 33
                        $var[0] >= "\x80"
1921
                        &&
1922 33
                        isset($n[0], $leading_combining[0])
1923
                        &&
1924 33
                        \preg_match('/^\\p{Mn}/u', $var)
1925
                    ) {
1926
                        // Prevent leading combining chars
1927
                        // for NFC-safe concatenations.
1928 3
                        $var = $leading_combining . $var;
1929
                    }
1930
                }
1931
1932 63
                break;
1933
            default:
1934
                // nothing
1935
        }
1936
1937
        /** @noinspection PhpSillyAssignmentInspection */
1938
        /** @psalm-var TFilter $var */
1939 65
        $var = $var;
1940
1941 65
        return $var;
1942
    }
1943
1944
    /**
1945
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1946
     *
1947
     * Gets a specific external variable by name and optionally filters it.
1948
     *
1949
     * EXAMPLE: <code>
1950
     * // _GET['foo'] = 'bar';
1951
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1952
     * </code>
1953
     *
1954
     * @see http://php.net/manual/en/function.filter-input.php
1955
     *
1956
     * @param int       $type          <p>
1957
     *                                 One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1958
     *                                 <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1959
     *                                 <b>INPUT_ENV</b>.
1960
     *                                 </p>
1961
     * @param string    $variable_name <p>
1962
     *                                 Name of a variable to get.
1963
     *                                 </p>
1964
     * @param int       $filter        [optional] <p>
1965
     *                                 The ID of the filter to apply. The
1966
     *                                 manual page lists the available filters.
1967
     *                                 </p>
1968
     * @param array|int $options       [optional] <p>
1969
     *                                 Associative array of options or bitwise disjunction of flags. If filter
1970
     *                                 accepts options, flags can be provided in "flags" field of array.
1971
     *                                 </p>
1972
     *
1973
     * @psalm-pure
1974
     *
1975
     * @return mixed
1976
     *               <p>
1977
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1978
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1979
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1980
     *               </p>
1981
     */
1982 1
    public static function filter_input(
1983
        int $type,
1984
        string $variable_name,
1985
        int $filter = \FILTER_DEFAULT,
1986
        $options = null
1987
    ) {
1988
        /**
1989
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
1990
         */
1991 1
        if ($options === null || \func_num_args() < 4) {
1992 1
            $var = \filter_input($type, $variable_name, $filter);
1993
        } else {
1994
            $var = \filter_input($type, $variable_name, $filter, $options);
1995
        }
1996
1997 1
        return self::filter($var);
1998
    }
1999
2000
    /**
2001
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2002
     *
2003
     * Gets external variables and optionally filters them.
2004
     *
2005
     * EXAMPLE: <code>
2006
     * // _GET['foo'] = 'bar';
2007
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2008
     * </code>
2009
     *
2010
     * @see http://php.net/manual/en/function.filter-input-array.php
2011
     *
2012
     * @param int        $type       <p>
2013
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2014
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2015
     *                               <b>INPUT_ENV</b>.
2016
     *                               </p>
2017
     * @param array|null $definition [optional] <p>
2018
     *                               An array defining the arguments. A valid key is a string
2019
     *                               containing a variable name and a valid value is either a filter type, or an array
2020
     *                               optionally specifying the filter, flags and options. If the value is an
2021
     *                               array, valid keys are filter which specifies the
2022
     *                               filter type,
2023
     *                               flags which specifies any flags that apply to the
2024
     *                               filter, and options which specifies any options that
2025
     *                               apply to the filter. See the example below for a better understanding.
2026
     *                               </p>
2027
     *                               <p>
2028
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2029
     *                               input array are filtered by this filter.
2030
     *                               </p>
2031
     * @param bool       $add_empty  [optional] <p>
2032
     *                               Add missing keys as <b>NULL</b> to the return value.
2033
     *                               </p>
2034
     *
2035
     * @psalm-pure
2036
     *
2037
     * @return mixed
2038
     *               <p>
2039
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2040
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2041
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2042
     *               is not set and <b>NULL</b> if the filter fails.
2043
     *               </p>
2044
     */
2045 1
    public static function filter_input_array(
2046
        int $type,
2047
        $definition = null,
2048
        bool $add_empty = true
2049
    ) {
2050
        /**
2051
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2052
         */
2053 1
        if ($definition === null || \func_num_args() < 2) {
2054
            $a = \filter_input_array($type);
2055
        } else {
2056 1
            $a = \filter_input_array($type, $definition, $add_empty);
2057
        }
2058
2059 1
        return self::filter($a);
2060
    }
2061
2062
    /**
2063
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2064
     *
2065
     * Filters a variable with a specified filter.
2066
     *
2067
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2068
     *
2069
     * @see http://php.net/manual/en/function.filter-var.php
2070
     *
2071
     * @param float|int|string|null $variable <p>
2072
     *                                        Value to filter.
2073
     *                                        </p>
2074
     * @param int                   $filter   [optional] <p>
2075
     *                                        The ID of the filter to apply. The
2076
     *                                        manual page lists the available filters.
2077
     *                                        </p>
2078
     * @param array|int             $options  [optional] <p>
2079
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2080
     *                                        accepts options, flags can be provided in "flags" field of array. For
2081
     *                                        the "callback" filter, callable type should be passed. The
2082
     *                                        callback must accept one argument, the value to be filtered, and return
2083
     *                                        the value after filtering/sanitizing it.
2084
     *                                        </p>
2085
     *                                        <p>
2086
     *                                        <code>
2087
     *                                        // for filters that accept options, use this format
2088
     *                                        $options = array(
2089
     *                                        'options' => array(
2090
     *                                        'default' => 3, // value to return if the filter fails
2091
     *                                        // other options here
2092
     *                                        'min_range' => 0
2093
     *                                        ),
2094
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2095
     *                                        );
2096
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2097
     *                                        // for filter that only accept flags, you can pass them directly
2098
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2099
     *                                        // for filter that only accept flags, you can also pass as an array
2100
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2101
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2102
     *                                        // callback validate filter
2103
     *                                        function foo($value)
2104
     *                                        {
2105
     *                                        // Expected format: Surname, GivenNames
2106
     *                                        if (strpos($value, ", ") === false) return false;
2107
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2108
     *                                        $empty = (empty($surname) || empty($givennames));
2109
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2110
     *                                        if ($empty || $notstrings) {
2111
     *                                        return false;
2112
     *                                        } else {
2113
     *                                        return $value;
2114
     *                                        }
2115
     *                                        }
2116
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2117
     *                                        </code>
2118
     *                                        </p>
2119
     *
2120
     * @psalm-pure
2121
     *
2122
     * @return mixed
2123
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2124
     */
2125 2
    public static function filter_var(
2126
        $variable,
2127
        int $filter = \FILTER_DEFAULT,
2128
        $options = null
2129
    ) {
2130
        /**
2131
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2132
         */
2133 2
        if (\func_num_args() < 3) {
2134 2
            $variable = \filter_var($variable, $filter);
2135
        } else {
2136 2
            $variable = \filter_var($variable, $filter, $options);
2137
        }
2138
2139 2
        return self::filter($variable);
2140
    }
2141
2142
    /**
2143
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2144
     *
2145
     * Gets multiple variables and optionally filters them.
2146
     *
2147
     * EXAMPLE: <code>
2148
     * $filters = [
2149
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2150
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2151
     *     'email' => FILTER_VALIDATE_EMAIL,
2152
     * ];
2153
     *
2154
     * $data = [
2155
     *     'name' => 'κόσμε',
2156
     *     'age' => '18',
2157
     *     'email' => '[email protected]'
2158
     * ];
2159
     *
2160
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2161
     * </code>
2162
     *
2163
     * @see http://php.net/manual/en/function.filter-var-array.php
2164
     *
2165
     * @param array<mixed> $data       <p>
2166
     *                                 An array with string keys containing the data to filter.
2167
     *                                 </p>
2168
     * @param array|int    $definition [optional] <p>
2169
     *                                 An array defining the arguments. A valid key is a string
2170
     *                                 containing a variable name and a valid value is either a
2171
     *                                 filter type, or an
2172
     *                                 array optionally specifying the filter, flags and options.
2173
     *                                 If the value is an array, valid keys are filter
2174
     *                                 which specifies the filter type,
2175
     *                                 flags which specifies any flags that apply to the
2176
     *                                 filter, and options which specifies any options that
2177
     *                                 apply to the filter. See the example below for a better understanding.
2178
     *                                 </p>
2179
     *                                 <p>
2180
     *                                 This parameter can be also an integer holding a filter constant. Then all values
2181
     *                                 in the input array are filtered by this filter.
2182
     *                                 </p>
2183
     * @param bool         $add_empty  [optional] <p>
2184
     *                                 Add missing keys as <b>NULL</b> to the return value.
2185
     *                                 </p>
2186
     *
2187
     * @psalm-pure
2188
     *
2189
     * @return mixed
2190
     *               <p>
2191
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2192
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2193
     *               set.
2194
     *               </p>
2195
     */
2196 2
    public static function filter_var_array(
2197
        array $data,
2198
        $definition = null,
2199
        bool $add_empty = true
2200
    ) {
2201
        /**
2202
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2203
         */
2204 2
        if (\func_num_args() < 2) {
2205 2
            $a = \filter_var_array($data);
2206
        } else {
2207 2
            $a = \filter_var_array($data, $definition, $add_empty);
2208
        }
2209
2210 2
        return self::filter($a);
2211
    }
2212
2213
    /**
2214
     * Checks whether finfo is available on the server.
2215
     *
2216
     * @psalm-pure
2217
     *
2218
     * @return bool
2219
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2220
     */
2221
    public static function finfo_loaded(): bool
2222
    {
2223
        return \class_exists('finfo');
2224
    }
2225
2226
    /**
2227
     * Returns the first $n characters of the string.
2228
     *
2229
     * @param string $str      <p>The input string.</p>
2230
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2231
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2232
     *
2233
     * @psalm-pure
2234
     *
2235
     * @return string
2236
     */
2237 13
    public static function first_char(
2238
        string $str,
2239
        int $n = 1,
2240
        string $encoding = 'UTF-8'
2241
    ): string {
2242 13
        if ($str === '' || $n <= 0) {
2243 5
            return '';
2244
        }
2245
2246 8
        if ($encoding === 'UTF-8') {
2247 4
            return (string) \mb_substr($str, 0, $n);
2248
        }
2249
2250 4
        return (string) self::substr($str, 0, $n, $encoding);
2251
    }
2252
2253
    /**
2254
     * Check if the number of Unicode characters isn't greater than the specified integer.
2255
     *
2256
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2257
     *
2258
     * @param string $str      the original string to be checked
2259
     * @param int    $box_size the size in number of chars to be checked against string
2260
     *
2261
     * @psalm-pure
2262
     *
2263
     * @return bool
2264
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2265
     */
2266 2
    public static function fits_inside(string $str, int $box_size): bool
2267
    {
2268 2
        return (int) self::strlen($str) <= $box_size;
2269
    }
2270
2271
    /**
2272
     * Try to fix simple broken UTF-8 strings.
2273
     *
2274
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2275
     *
2276
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2277
     *
2278
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2279
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2280
     * See: http://en.wikipedia.org/wiki/Windows-1252
2281
     *
2282
     * @param string $str <p>The input string</p>
2283
     *
2284
     * @psalm-pure
2285
     *
2286
     * @return string
2287
     */
2288 47
    public static function fix_simple_utf8(string $str): string
2289
    {
2290 47
        if ($str === '') {
2291 4
            return '';
2292
        }
2293
2294
        /**
2295
         * @psalm-suppress ImpureStaticVariable
2296
         *
2297
         * @var array<mixed>|null
2298
         */
2299 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2300
2301
        /**
2302
         * @psalm-suppress ImpureStaticVariable
2303
         *
2304
         * @var array<mixed>|null
2305
         */
2306 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2307
2308 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2309 1
            if (self::$BROKEN_UTF8_FIX === null) {
2310 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2311
            }
2312
2313 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2314 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2315
        }
2316
2317 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2318
2319 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2320
    }
2321
2322
    /**
2323
     * Fix a double (or multiple) encoded UTF8 string.
2324
     *
2325
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2326
     *
2327
     * @param string|string[] $str you can use a string or an array of strings
2328
     *
2329
     * @psalm-pure
2330
     *
2331
     * @return string|string[]
2332
     *                         Will return the fixed input-"array" or
2333
     *                         the fixed input-"string"
2334
     *
2335
     * @psalm-suppress InvalidReturnType
2336
     */
2337 2
    public static function fix_utf8($str)
2338
    {
2339 2
        if (\is_array($str)) {
2340 2
            foreach ($str as $k => &$v) {
2341 2
                $v = self::fix_utf8($v);
2342
            }
2343 2
            unset($v);
2344
2345
            /**
2346
             * @psalm-suppress InvalidReturnStatement
2347
             */
2348 2
            return $str;
2349
        }
2350
2351 2
        $str = (string) $str;
2352 2
        $last = '';
2353 2
        while ($last !== $str) {
2354 2
            $last = $str;
2355
            /**
2356
             * @psalm-suppress PossiblyInvalidArgument
2357
             */
2358 2
            $str = self::to_utf8(
2359 2
                self::utf8_decode($str, true)
2360
            );
2361
        }
2362
2363
        /**
2364
         * @psalm-suppress InvalidReturnStatement
2365
         */
2366 2
        return $str;
2367
    }
2368
2369
    /**
2370
     * Get character of a specific character.
2371
     *
2372
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2373
     *
2374
     * @param string $char
2375
     *
2376
     * @psalm-pure
2377
     *
2378
     * @return string
2379
     *                <p>'RTL' or 'LTR'.</p>
2380
     */
2381 2
    public static function getCharDirection(string $char): string
2382
    {
2383 2
        if (self::$SUPPORT['intlChar'] === true) {
2384
            /** @noinspection PhpComposerExtensionStubsInspection */
2385 2
            $tmp_return = \IntlChar::charDirection($char);
2386
2387
            // from "IntlChar"-Class
2388
            $char_direction = [
2389 2
                'RTL' => [1, 13, 14, 15, 21],
2390
                'LTR' => [0, 11, 12, 20],
2391
            ];
2392
2393 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2394
                return 'LTR';
2395
            }
2396
2397 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2398 2
                return 'RTL';
2399
            }
2400
        }
2401
2402 2
        $c = static::chr_to_decimal($char);
2403
2404 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2405 2
            return 'LTR';
2406
        }
2407
2408 2
        if ($c <= 0x85e) {
2409 2
            if ($c === 0x5be ||
2410 2
                $c === 0x5c0 ||
2411 2
                $c === 0x5c3 ||
2412 2
                $c === 0x5c6 ||
2413 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2414 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2415 2
                $c === 0x608 ||
2416 2
                $c === 0x60b ||
2417 2
                $c === 0x60d ||
2418 2
                $c === 0x61b ||
2419 2
                ($c >= 0x61e && $c <= 0x64a) ||
2420
                ($c >= 0x66d && $c <= 0x66f) ||
2421
                ($c >= 0x671 && $c <= 0x6d5) ||
2422
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2423
                ($c >= 0x6ee && $c <= 0x6ef) ||
2424
                ($c >= 0x6fa && $c <= 0x70d) ||
2425
                $c === 0x710 ||
2426
                ($c >= 0x712 && $c <= 0x72f) ||
2427
                ($c >= 0x74d && $c <= 0x7a5) ||
2428
                $c === 0x7b1 ||
2429
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2430
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2431
                $c === 0x7fa ||
2432
                ($c >= 0x800 && $c <= 0x815) ||
2433
                $c === 0x81a ||
2434
                $c === 0x824 ||
2435
                $c === 0x828 ||
2436
                ($c >= 0x830 && $c <= 0x83e) ||
2437
                ($c >= 0x840 && $c <= 0x858) ||
2438 2
                $c === 0x85e
2439
            ) {
2440 2
                return 'RTL';
2441
            }
2442 2
        } elseif ($c === 0x200f) {
2443
            return 'RTL';
2444 2
        } elseif ($c >= 0xfb1d) {
2445 2
            if ($c === 0xfb1d ||
2446 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2447 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2448 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2449 2
                $c === 0xfb3e ||
2450 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2451 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2452 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2453 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2454 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2455 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2456 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2457 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2458 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2459 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2460 2
                $c === 0x10808 ||
2461 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2462 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2463 2
                $c === 0x1083c ||
2464 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2465 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2466 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2467 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2468 2
                $c === 0x1093f ||
2469 2
                $c === 0x10a00 ||
2470 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2471 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2472 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2473 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2474 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2475 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2476 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2477 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2478 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2479 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2480
            ) {
2481 2
                return 'RTL';
2482
            }
2483
        }
2484
2485 2
        return 'LTR';
2486
    }
2487
2488
    /**
2489
     * Check for php-support.
2490
     *
2491
     * @param string|null $key
2492
     *
2493
     * @psalm-pure
2494
     *
2495
     * @return mixed
2496
     *               Return the full support-"array", if $key === null<br>
2497
     *               return bool-value, if $key is used and available<br>
2498
     *               otherwise return <strong>null</strong>
2499
     */
2500 27
    public static function getSupportInfo(string $key = null)
2501
    {
2502 27
        if ($key === null) {
2503 4
            return self::$SUPPORT;
2504
        }
2505
2506 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2507 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2508
        }
2509
        // compatibility fix for old versions
2510 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2511
2512 25
        return self::$SUPPORT[$key] ?? null;
2513
    }
2514
2515
    /**
2516
     * Warning: this method only works for some file-types (png, jpg)
2517
     *          if you need more supported types, please use e.g. "finfo"
2518
     *
2519
     * @param string $str
2520
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2521
     *
2522
     * @psalm-pure
2523
     *
2524
     * @return null[]|string[]
2525
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2526
     *
2527
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2528
     */
2529 40
    public static function get_file_type(
2530
        string $str,
2531
        array $fallback = [
2532
            'ext'  => null,
2533
            'mime' => 'application/octet-stream',
2534
            'type' => null,
2535
        ]
2536
    ): array {
2537 40
        if ($str === '') {
2538
            return $fallback;
2539
        }
2540
2541
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2542 40
        $str_info = \substr($str, 0, 2);
2543 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2544 11
            return $fallback;
2545
        }
2546
2547
        // DEBUG
2548
        //var_dump($str_info);
2549
2550 36
        $str_info = \unpack('C2chars', $str_info);
2551
2552
        /** @noinspection PhpSillyAssignmentInspection */
2553
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2554 36
        $str_info = $str_info;
2555
2556 36
        if ($str_info === false) {
2557
            return $fallback;
2558
        }
2559
        /** @noinspection OffsetOperationsInspection */
2560 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2561
2562
        // DEBUG
2563
        //var_dump($type_code);
2564
2565
        //
2566
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2567
        //
2568
        switch ($type_code) {
2569
            // WARNING: do not add too simple comparisons, because of false-positive results:
2570
            //
2571
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2572
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2573
            //
2574 36
            case 255216:
2575
                $ext = 'jpg';
2576
                $mime = 'image/jpeg';
2577
                $type = 'binary';
2578
2579
                break;
2580 36
            case 13780:
2581 7
                $ext = 'png';
2582 7
                $mime = 'image/png';
2583 7
                $type = 'binary';
2584
2585 7
                break;
2586
            default:
2587 35
                return $fallback;
2588
        }
2589
2590
        return [
2591 7
            'ext'  => $ext,
2592 7
            'mime' => $mime,
2593 7
            'type' => $type,
2594
        ];
2595
    }
2596
2597
    /**
2598
     * @param int    $length         <p>Length of the random string.</p>
2599
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2600
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2601
     *
2602
     * @return string
2603
     */
2604 1
    public static function get_random_string(
2605
        int $length,
2606
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2607
        string $encoding = 'UTF-8'
2608
    ): string {
2609
        // init
2610 1
        $i = 0;
2611 1
        $str = '';
2612
2613
        //
2614
        // add random chars
2615
        //
2616
2617 1
        if ($encoding === 'UTF-8') {
2618 1
            $max_length = (int) \mb_strlen($possible_chars);
2619 1
            if ($max_length === 0) {
2620 1
                return '';
2621
            }
2622
2623 1
            while ($i < $length) {
2624
                try {
2625 1
                    $rand_int = \random_int(0, $max_length - 1);
2626
                } catch (\Exception $e) {
2627
                    /** @noinspection RandomApiMigrationInspection */
2628
                    $rand_int = \mt_rand(0, $max_length - 1);
2629
                }
2630 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2631 1
                if ($char !== false) {
2632 1
                    $str .= $char;
2633 1
                    ++$i;
2634
                }
2635
            }
2636
        } else {
2637
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2638
2639
            $max_length = (int) self::strlen($possible_chars, $encoding);
2640
            if ($max_length === 0) {
2641
                return '';
2642
            }
2643
2644
            while ($i < $length) {
2645
                try {
2646
                    $rand_int = \random_int(0, $max_length - 1);
2647
                } catch (\Exception $e) {
2648
                    /** @noinspection RandomApiMigrationInspection */
2649
                    $rand_int = \mt_rand(0, $max_length - 1);
2650
                }
2651
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2652
                if ($char !== false) {
2653
                    $str .= $char;
2654
                    ++$i;
2655
                }
2656
            }
2657
        }
2658
2659 1
        return $str;
2660
    }
2661
2662
    /**
2663
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2664
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2665
     *
2666
     * @return string
2667
     */
2668 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2669
    {
2670
        try {
2671 1
            $rand_int = \random_int(0, \mt_getrandmax());
2672
        } catch (\Exception $e) {
2673
            /** @noinspection RandomApiMigrationInspection */
2674
            $rand_int = \mt_rand(0, \mt_getrandmax());
2675
        }
2676
2677
        $unique_helper = $rand_int .
2678 1
                         \session_id() .
2679 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2680 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2681 1
                         $extra_entropy;
2682
2683 1
        $unique_string = \uniqid($unique_helper, true);
2684
2685 1
        if ($use_md5) {
2686 1
            $unique_string = \md5($unique_string . $unique_helper);
2687
        }
2688
2689 1
        return $unique_string;
2690
    }
2691
2692
    /**
2693
     * alias for "UTF8::string_has_bom()"
2694
     *
2695
     * @param string $str
2696
     *
2697
     * @psalm-pure
2698
     *
2699
     * @return bool
2700
     *
2701
     * @see        UTF8::string_has_bom()
2702
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2703
     */
2704 2
    public static function hasBom(string $str): bool
2705
    {
2706 2
        return self::string_has_bom($str);
2707
    }
2708
2709
    /**
2710
     * Returns true if the string contains a lower case char, false otherwise.
2711
     *
2712
     * @param string $str <p>The input string.</p>
2713
     *
2714
     * @psalm-pure
2715
     *
2716
     * @return bool
2717
     *              <p>Whether or not the string contains a lower case character.</p>
2718
     */
2719 47
    public static function has_lowercase(string $str): bool
2720
    {
2721 47
        if (self::$SUPPORT['mbstring'] === true) {
2722
            /** @noinspection PhpComposerExtensionStubsInspection */
2723 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2724
        }
2725
2726
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2727
    }
2728
2729
    /**
2730
     * Returns true if the string contains whitespace, false otherwise.
2731
     *
2732
     * @param string $str <p>The input string.</p>
2733
     *
2734
     * @psalm-pure
2735
     *
2736
     * @return bool
2737
     *              <p>Whether or not the string contains whitespace.</p>
2738
     */
2739 11
    public static function has_whitespace(string $str): bool
2740
    {
2741 11
        if (self::$SUPPORT['mbstring'] === true) {
2742
            /** @noinspection PhpComposerExtensionStubsInspection */
2743 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2744
        }
2745
2746
        return self::str_matches_pattern($str, '.*[[:space:]]');
2747
    }
2748
2749
    /**
2750
     * Returns true if the string contains an upper case char, false otherwise.
2751
     *
2752
     * @param string $str <p>The input string.</p>
2753
     *
2754
     * @psalm-pure
2755
     *
2756
     * @return bool whether or not the string contains an upper case character
2757
     */
2758 12
    public static function has_uppercase(string $str): bool
2759
    {
2760 12
        if (self::$SUPPORT['mbstring'] === true) {
2761
            /** @noinspection PhpComposerExtensionStubsInspection */
2762 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2763
        }
2764
2765
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2766
    }
2767
2768
    /**
2769
     * Converts a hexadecimal value into a UTF-8 character.
2770
     *
2771
     * INFO: opposite to UTF8::chr_to_hex()
2772
     *
2773
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2774
     *
2775
     * @param string $hexdec <p>The hexadecimal value.</p>
2776
     *
2777
     * @psalm-pure
2778
     *
2779
     * @return false|string one single UTF-8 character
2780
     */
2781 4
    public static function hex_to_chr(string $hexdec)
2782
    {
2783
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2784 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2785
    }
2786
2787
    /**
2788
     * Converts hexadecimal U+xxxx code point representation to integer.
2789
     *
2790
     * INFO: opposite to UTF8::int_to_hex()
2791
     *
2792
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2793
     *
2794
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2795
     *
2796
     * @psalm-pure
2797
     *
2798
     * @return false|int
2799
     *                   <p>The code point, or false on failure.</p>
2800
     */
2801 2
    public static function hex_to_int($hexdec)
2802
    {
2803
        // init
2804 2
        $hexdec = (string) $hexdec;
2805
2806 2
        if ($hexdec === '') {
2807 2
            return false;
2808
        }
2809
2810 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2811 2
            return \intval($match[1], 16);
2812
        }
2813
2814 2
        return false;
2815
    }
2816
2817
    /**
2818
     * alias for "UTF8::html_entity_decode()"
2819
     *
2820
     * @param string $str
2821
     * @param int    $flags
2822
     * @param string $encoding
2823
     *
2824
     * @psalm-pure
2825
     *
2826
     * @return string
2827
     *
2828
     * @see        UTF8::html_entity_decode()
2829
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2830
     */
2831 2
    public static function html_decode(
2832
        string $str,
2833
        int $flags = null,
2834
        string $encoding = 'UTF-8'
2835
    ): string {
2836 2
        return self::html_entity_decode($str, $flags, $encoding);
2837
    }
2838
2839
    /**
2840
     * Converts a UTF-8 string to a series of HTML numbered entities.
2841
     *
2842
     * INFO: opposite to UTF8::html_decode()
2843
     *
2844
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2845
     *
2846
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2847
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2848
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2849
     *
2850
     * @psalm-pure
2851
     *
2852
     * @return string HTML numbered entities
2853
     */
2854 14
    public static function html_encode(
2855
        string $str,
2856
        bool $keep_ascii_chars = false,
2857
        string $encoding = 'UTF-8'
2858
    ): string {
2859 14
        if ($str === '') {
2860 4
            return '';
2861
        }
2862
2863 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2864 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2865
        }
2866
2867
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2868 14
        if (self::$SUPPORT['mbstring'] === true) {
2869 14
            $start_code = 0x00;
2870 14
            if ($keep_ascii_chars) {
2871 13
                $start_code = 0x80;
2872
            }
2873
2874 14
            if ($encoding === 'UTF-8') {
2875
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2876 14
                $return = \mb_encode_numericentity(
2877 14
                    $str,
2878 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2879
                );
2880 14
                if ($return !== null && $return !== false) {
2881 14
                    return $return;
2882
                }
2883
            }
2884
2885
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2886 4
            $return = \mb_encode_numericentity(
2887 4
                $str,
2888 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2889 4
                $encoding
2890
            );
2891 4
            if ($return !== null && $return !== false) {
2892 4
                return $return;
2893
            }
2894
        }
2895
2896
        //
2897
        // fallback via vanilla php
2898
        //
2899
2900
        return \implode(
2901
            '',
2902
            \array_map(
2903
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2904
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2905
                },
2906
                self::str_split($str)
2907
            )
2908
        );
2909
    }
2910
2911
    /**
2912
     * UTF-8 version of html_entity_decode()
2913
     *
2914
     * The reason we are not using html_entity_decode() by itself is because
2915
     * while it is not technically correct to leave out the semicolon
2916
     * at the end of an entity most browsers will still interpret the entity
2917
     * correctly. html_entity_decode() does not convert entities without
2918
     * semicolons, so we are left with our own little solution here. Bummer.
2919
     *
2920
     * Convert all HTML entities to their applicable characters.
2921
     *
2922
     * INFO: opposite to UTF8::html_encode()
2923
     *
2924
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2925
     *
2926
     * @see http://php.net/manual/en/function.html-entity-decode.php
2927
     *
2928
     * @param string $str      <p>
2929
     *                         The input string.
2930
     *                         </p>
2931
     * @param int    $flags    [optional] <p>
2932
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2933
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2934
     *                         <table>
2935
     *                         Available <i>flags</i> constants
2936
     *                         <tr valign="top">
2937
     *                         <td>Constant Name</td>
2938
     *                         <td>Description</td>
2939
     *                         </tr>
2940
     *                         <tr valign="top">
2941
     *                         <td><b>ENT_COMPAT</b></td>
2942
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2943
     *                         </tr>
2944
     *                         <tr valign="top">
2945
     *                         <td><b>ENT_QUOTES</b></td>
2946
     *                         <td>Will convert both double and single quotes.</td>
2947
     *                         </tr>
2948
     *                         <tr valign="top">
2949
     *                         <td><b>ENT_NOQUOTES</b></td>
2950
     *                         <td>Will leave both double and single quotes unconverted.</td>
2951
     *                         </tr>
2952
     *                         <tr valign="top">
2953
     *                         <td><b>ENT_HTML401</b></td>
2954
     *                         <td>
2955
     *                         Handle code as HTML 4.01.
2956
     *                         </td>
2957
     *                         </tr>
2958
     *                         <tr valign="top">
2959
     *                         <td><b>ENT_XML1</b></td>
2960
     *                         <td>
2961
     *                         Handle code as XML 1.
2962
     *                         </td>
2963
     *                         </tr>
2964
     *                         <tr valign="top">
2965
     *                         <td><b>ENT_XHTML</b></td>
2966
     *                         <td>
2967
     *                         Handle code as XHTML.
2968
     *                         </td>
2969
     *                         </tr>
2970
     *                         <tr valign="top">
2971
     *                         <td><b>ENT_HTML5</b></td>
2972
     *                         <td>
2973
     *                         Handle code as HTML 5.
2974
     *                         </td>
2975
     *                         </tr>
2976
     *                         </table>
2977
     *                         </p>
2978
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2979
     *
2980
     * @psalm-pure
2981
     *
2982
     * @return string the decoded string
2983
     */
2984 51
    public static function html_entity_decode(
2985
        string $str,
2986
        int $flags = null,
2987
        string $encoding = 'UTF-8'
2988
    ): string {
2989
        if (
2990 51
            !isset($str[3]) // examples: &; || &x;
2991
            ||
2992 51
            \strpos($str, '&') === false // no "&"
2993
        ) {
2994 24
            return $str;
2995
        }
2996
2997 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2998 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2999
        }
3000
3001 49
        if ($flags === null) {
3002 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3003
        }
3004
3005
        if (
3006 49
            $encoding !== 'UTF-8'
3007
            &&
3008 49
            $encoding !== 'ISO-8859-1'
3009
            &&
3010 49
            $encoding !== 'WINDOWS-1252'
3011
            &&
3012 49
            self::$SUPPORT['mbstring'] === false
3013
        ) {
3014
            /**
3015
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3016
             */
3017
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3018
        }
3019
3020
        do {
3021 49
            $str_compare = $str;
3022
3023 49
            if (\strpos($str, '&') !== false) {
3024 49
                if (\strpos($str, '&#') !== false) {
3025
                    // decode also numeric & UTF16 two byte entities
3026 41
                    $str = (string) \preg_replace(
3027 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3028 41
                        '$1;',
3029 41
                        $str
3030
                    );
3031
                }
3032
3033 49
                $str = \html_entity_decode(
3034 49
                    $str,
3035 49
                    $flags,
3036 49
                    $encoding
3037
                );
3038
            }
3039 49
        } while ($str_compare !== $str);
3040
3041 49
        return $str;
3042
    }
3043
3044
    /**
3045
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3046
     *
3047
     * @param string $str
3048
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3049
     *
3050
     * @psalm-pure
3051
     *
3052
     * @return string
3053
     */
3054 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3055
    {
3056 6
        return self::htmlspecialchars(
3057 6
            $str,
3058 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3059 6
            $encoding
3060
        );
3061
    }
3062
3063
    /**
3064
     * Remove empty html-tag.
3065
     *
3066
     * e.g.: <pre><tag></tag></pre>
3067
     *
3068
     * @param string $str
3069
     *
3070
     * @psalm-pure
3071
     *
3072
     * @return string
3073
     */
3074 1
    public static function html_stripe_empty_tags(string $str): string
3075
    {
3076 1
        return (string) \preg_replace(
3077 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3078 1
            '',
3079 1
            $str
3080
        );
3081
    }
3082
3083
    /**
3084
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3085
     *
3086
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3087
     *
3088
     * @see http://php.net/manual/en/function.htmlentities.php
3089
     *
3090
     * @param string $str           <p>
3091
     *                              The input string.
3092
     *                              </p>
3093
     * @param int    $flags         [optional] <p>
3094
     *                              A bitmask of one or more of the following flags, which specify how to handle
3095
     *                              quotes, invalid code unit sequences and the used document type. The default is
3096
     *                              ENT_COMPAT | ENT_HTML401.
3097
     *                              <table>
3098
     *                              Available <i>flags</i> constants
3099
     *                              <tr valign="top">
3100
     *                              <td>Constant Name</td>
3101
     *                              <td>Description</td>
3102
     *                              </tr>
3103
     *                              <tr valign="top">
3104
     *                              <td><b>ENT_COMPAT</b></td>
3105
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3106
     *                              </tr>
3107
     *                              <tr valign="top">
3108
     *                              <td><b>ENT_QUOTES</b></td>
3109
     *                              <td>Will convert both double and single quotes.</td>
3110
     *                              </tr>
3111
     *                              <tr valign="top">
3112
     *                              <td><b>ENT_NOQUOTES</b></td>
3113
     *                              <td>Will leave both double and single quotes unconverted.</td>
3114
     *                              </tr>
3115
     *                              <tr valign="top">
3116
     *                              <td><b>ENT_IGNORE</b></td>
3117
     *                              <td>
3118
     *                              Silently discard invalid code unit sequences instead of returning
3119
     *                              an empty string. Using this flag is discouraged as it
3120
     *                              may have security implications.
3121
     *                              </td>
3122
     *                              </tr>
3123
     *                              <tr valign="top">
3124
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3125
     *                              <td>
3126
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3127
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3128
     *                              string.
3129
     *                              </td>
3130
     *                              </tr>
3131
     *                              <tr valign="top">
3132
     *                              <td><b>ENT_DISALLOWED</b></td>
3133
     *                              <td>
3134
     *                              Replace invalid code points for the given document type with a
3135
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3136
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3137
     *                              instance, to ensure the well-formedness of XML documents with
3138
     *                              embedded external content.
3139
     *                              </td>
3140
     *                              </tr>
3141
     *                              <tr valign="top">
3142
     *                              <td><b>ENT_HTML401</b></td>
3143
     *                              <td>
3144
     *                              Handle code as HTML 4.01.
3145
     *                              </td>
3146
     *                              </tr>
3147
     *                              <tr valign="top">
3148
     *                              <td><b>ENT_XML1</b></td>
3149
     *                              <td>
3150
     *                              Handle code as XML 1.
3151
     *                              </td>
3152
     *                              </tr>
3153
     *                              <tr valign="top">
3154
     *                              <td><b>ENT_XHTML</b></td>
3155
     *                              <td>
3156
     *                              Handle code as XHTML.
3157
     *                              </td>
3158
     *                              </tr>
3159
     *                              <tr valign="top">
3160
     *                              <td><b>ENT_HTML5</b></td>
3161
     *                              <td>
3162
     *                              Handle code as HTML 5.
3163
     *                              </td>
3164
     *                              </tr>
3165
     *                              </table>
3166
     *                              </p>
3167
     * @param string $encoding      [optional] <p>
3168
     *                              Like <b>htmlspecialchars</b>,
3169
     *                              <b>htmlentities</b> takes an optional third argument
3170
     *                              <i>encoding</i> which defines encoding used in
3171
     *                              conversion.
3172
     *                              Although this argument is technically optional, you are highly
3173
     *                              encouraged to specify the correct value for your code.
3174
     *                              </p>
3175
     * @param bool   $double_encode [optional] <p>
3176
     *                              When <i>double_encode</i> is turned off PHP will not
3177
     *                              encode existing html entities. The default is to convert everything.
3178
     *                              </p>
3179
     *
3180
     * @psalm-pure
3181
     *
3182
     * @return string
3183
     *                <p>
3184
     *                The encoded string.
3185
     *                <br><br>
3186
     *                If the input <i>string</i> contains an invalid code unit
3187
     *                sequence within the given <i>encoding</i> an empty string
3188
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3189
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3190
     *                </p>
3191
     */
3192 9
    public static function htmlentities(
3193
        string $str,
3194
        int $flags = \ENT_COMPAT,
3195
        string $encoding = 'UTF-8',
3196
        bool $double_encode = true
3197
    ): string {
3198 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3199 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3200
        }
3201
3202 9
        $str = \htmlentities(
3203 9
            $str,
3204 9
            $flags,
3205 9
            $encoding,
3206 9
            $double_encode
3207
        );
3208
3209
        /**
3210
         * PHP doesn't replace a backslash to its html entity since this is something
3211
         * that's mostly used to escape characters when inserting in a database. Since
3212
         * we're using a decent database layer, we don't need this shit and we're replacing
3213
         * the double backslashes by its' html entity equivalent.
3214
         *
3215
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3216
         */
3217 9
        $str = \str_replace('\\', '&#92;', $str);
3218
3219 9
        return self::html_encode($str, true, $encoding);
3220
    }
3221
3222
    /**
3223
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3224
     *
3225
     * INFO: Take a look at "UTF8::htmlentities()"
3226
     *
3227
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3228
     *
3229
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3230
     *
3231
     * @param string $str           <p>
3232
     *                              The string being converted.
3233
     *                              </p>
3234
     * @param int    $flags         [optional] <p>
3235
     *                              A bitmask of one or more of the following flags, which specify how to handle
3236
     *                              quotes, invalid code unit sequences and the used document type. The default is
3237
     *                              ENT_COMPAT | ENT_HTML401.
3238
     *                              <table>
3239
     *                              Available <i>flags</i> constants
3240
     *                              <tr valign="top">
3241
     *                              <td>Constant Name</td>
3242
     *                              <td>Description</td>
3243
     *                              </tr>
3244
     *                              <tr valign="top">
3245
     *                              <td><b>ENT_COMPAT</b></td>
3246
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3247
     *                              </tr>
3248
     *                              <tr valign="top">
3249
     *                              <td><b>ENT_QUOTES</b></td>
3250
     *                              <td>Will convert both double and single quotes.</td>
3251
     *                              </tr>
3252
     *                              <tr valign="top">
3253
     *                              <td><b>ENT_NOQUOTES</b></td>
3254
     *                              <td>Will leave both double and single quotes unconverted.</td>
3255
     *                              </tr>
3256
     *                              <tr valign="top">
3257
     *                              <td><b>ENT_IGNORE</b></td>
3258
     *                              <td>
3259
     *                              Silently discard invalid code unit sequences instead of returning
3260
     *                              an empty string. Using this flag is discouraged as it
3261
     *                              may have security implications.
3262
     *                              </td>
3263
     *                              </tr>
3264
     *                              <tr valign="top">
3265
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3266
     *                              <td>
3267
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3268
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3269
     *                              string.
3270
     *                              </td>
3271
     *                              </tr>
3272
     *                              <tr valign="top">
3273
     *                              <td><b>ENT_DISALLOWED</b></td>
3274
     *                              <td>
3275
     *                              Replace invalid code points for the given document type with a
3276
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3277
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3278
     *                              instance, to ensure the well-formedness of XML documents with
3279
     *                              embedded external content.
3280
     *                              </td>
3281
     *                              </tr>
3282
     *                              <tr valign="top">
3283
     *                              <td><b>ENT_HTML401</b></td>
3284
     *                              <td>
3285
     *                              Handle code as HTML 4.01.
3286
     *                              </td>
3287
     *                              </tr>
3288
     *                              <tr valign="top">
3289
     *                              <td><b>ENT_XML1</b></td>
3290
     *                              <td>
3291
     *                              Handle code as XML 1.
3292
     *                              </td>
3293
     *                              </tr>
3294
     *                              <tr valign="top">
3295
     *                              <td><b>ENT_XHTML</b></td>
3296
     *                              <td>
3297
     *                              Handle code as XHTML.
3298
     *                              </td>
3299
     *                              </tr>
3300
     *                              <tr valign="top">
3301
     *                              <td><b>ENT_HTML5</b></td>
3302
     *                              <td>
3303
     *                              Handle code as HTML 5.
3304
     *                              </td>
3305
     *                              </tr>
3306
     *                              </table>
3307
     *                              </p>
3308
     * @param string $encoding      [optional] <p>
3309
     *                              Defines encoding used in conversion.
3310
     *                              </p>
3311
     *                              <p>
3312
     *                              For the purposes of this function, the encodings
3313
     *                              ISO-8859-1, ISO-8859-15,
3314
     *                              UTF-8, cp866,
3315
     *                              cp1251, cp1252, and
3316
     *                              KOI8-R are effectively equivalent, provided the
3317
     *                              <i>string</i> itself is valid for the encoding, as
3318
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3319
     *                              the same positions in all of these encodings.
3320
     *                              </p>
3321
     * @param bool   $double_encode [optional] <p>
3322
     *                              When <i>double_encode</i> is turned off PHP will not
3323
     *                              encode existing html entities, the default is to convert everything.
3324
     *                              </p>
3325
     *
3326
     * @psalm-pure
3327
     *
3328
     * @return string the converted string.
3329
     *                </p>
3330
     *                <p>
3331
     *                If the input <i>string</i> contains an invalid code unit
3332
     *                sequence within the given <i>encoding</i> an empty string
3333
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3334
     *                <b>ENT_SUBSTITUTE</b> flags are set
3335
     */
3336 8
    public static function htmlspecialchars(
3337
        string $str,
3338
        int $flags = \ENT_COMPAT,
3339
        string $encoding = 'UTF-8',
3340
        bool $double_encode = true
3341
    ): string {
3342 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3343 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3344
        }
3345
3346 8
        return \htmlspecialchars(
3347 8
            $str,
3348 8
            $flags,
3349 8
            $encoding,
3350 8
            $double_encode
3351
        );
3352
    }
3353
3354
    /**
3355
     * Checks whether iconv is available on the server.
3356
     *
3357
     * @psalm-pure
3358
     *
3359
     * @return bool
3360
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3361
     */
3362
    public static function iconv_loaded(): bool
3363
    {
3364
        return \extension_loaded('iconv');
3365
    }
3366
3367
    /**
3368
     * alias for "UTF8::decimal_to_chr()"
3369
     *
3370
     * @param int|string $int
3371
     *
3372
     * @psalm-param int|numeric-string $int
3373
     *
3374
     * @psalm-pure
3375
     *
3376
     * @return string
3377
     *
3378
     * @see        UTF8::decimal_to_chr()
3379
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3380
     */
3381 4
    public static function int_to_chr($int): string
3382
    {
3383 4
        return self::decimal_to_chr($int);
3384
    }
3385
3386
    /**
3387
     * Converts Integer to hexadecimal U+xxxx code point representation.
3388
     *
3389
     * INFO: opposite to UTF8::hex_to_int()
3390
     *
3391
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3392
     *
3393
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3394
     * @param string $prefix [optional]
3395
     *
3396
     * @psalm-pure
3397
     *
3398
     * @return string the code point, or empty string on failure
3399
     */
3400 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3401
    {
3402 6
        $hex = \dechex($int);
3403
3404 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3405
3406 6
        return $prefix . $hex . '';
3407
    }
3408
3409
    /**
3410
     * Checks whether intl-char is available on the server.
3411
     *
3412
     * @psalm-pure
3413
     *
3414
     * @return bool
3415
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3416
     */
3417
    public static function intlChar_loaded(): bool
3418
    {
3419
        return \class_exists('IntlChar');
3420
    }
3421
3422
    /**
3423
     * Checks whether intl is available on the server.
3424
     *
3425
     * @psalm-pure
3426
     *
3427
     * @return bool
3428
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3429
     */
3430 5
    public static function intl_loaded(): bool
3431
    {
3432 5
        return \extension_loaded('intl');
3433
    }
3434
3435
    /**
3436
     * alias for "UTF8::is_ascii()"
3437
     *
3438
     * @param string $str
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *
3444
     * @see        UTF8::is_ascii()
3445
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3446
     */
3447 2
    public static function isAscii(string $str): bool
3448
    {
3449 2
        return ASCII::is_ascii($str);
3450
    }
3451
3452
    /**
3453
     * alias for "UTF8::is_base64()"
3454
     *
3455
     * @param string $str
3456
     *
3457
     * @psalm-pure
3458
     *
3459
     * @return bool
3460
     *
3461
     * @see        UTF8::is_base64()
3462
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3463
     */
3464 2
    public static function isBase64($str): bool
3465
    {
3466 2
        return self::is_base64($str);
3467
    }
3468
3469
    /**
3470
     * alias for "UTF8::is_binary()"
3471
     *
3472
     * @param int|string $str
3473
     * @param bool       $strict
3474
     *
3475
     * @psalm-pure
3476
     *
3477
     * @return bool
3478
     *
3479
     * @see        UTF8::is_binary()
3480
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3481
     */
3482 4
    public static function isBinary($str, bool $strict = false): bool
3483
    {
3484 4
        return self::is_binary($str, $strict);
3485
    }
3486
3487
    /**
3488
     * alias for "UTF8::is_bom()"
3489
     *
3490
     * @param string $utf8_chr
3491
     *
3492
     * @psalm-pure
3493
     *
3494
     * @return bool
3495
     *
3496
     * @see        UTF8::is_bom()
3497
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3498
     */
3499 2
    public static function isBom(string $utf8_chr): bool
3500
    {
3501 2
        return self::is_bom($utf8_chr);
3502
    }
3503
3504
    /**
3505
     * alias for "UTF8::is_html()"
3506
     *
3507
     * @param string $str
3508
     *
3509
     * @psalm-pure
3510
     *
3511
     * @return bool
3512
     *
3513
     * @see        UTF8::is_html()
3514
     * @deprecated <p>please use "UTF8::is_html()"</p>
3515
     */
3516 2
    public static function isHtml(string $str): bool
3517
    {
3518 2
        return self::is_html($str);
3519
    }
3520
3521
    /**
3522
     * alias for "UTF8::is_json()"
3523
     *
3524
     * @param string $str
3525
     *
3526
     * @return bool
3527
     *
3528
     * @see        UTF8::is_json()
3529
     * @deprecated <p>please use "UTF8::is_json()"</p>
3530
     */
3531 1
    public static function isJson(string $str): bool
3532
    {
3533 1
        return self::is_json($str);
3534
    }
3535
3536
    /**
3537
     * alias for "UTF8::is_utf16()"
3538
     *
3539
     * @param string $str
3540
     *
3541
     * @psalm-pure
3542
     *
3543
     * @return false|int
3544
     *                   <strong>false</strong> if is't not UTF16,<br>
3545
     *                   <strong>1</strong> for UTF-16LE,<br>
3546
     *                   <strong>2</strong> for UTF-16BE
3547
     *
3548
     * @see        UTF8::is_utf16()
3549
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3550
     */
3551 2
    public static function isUtf16($str)
3552
    {
3553 2
        return self::is_utf16($str);
3554
    }
3555
3556
    /**
3557
     * alias for "UTF8::is_utf32()"
3558
     *
3559
     * @param string $str
3560
     *
3561
     * @psalm-pure
3562
     *
3563
     * @return false|int
3564
     *                   <strong>false</strong> if is't not UTF16,
3565
     *                   <strong>1</strong> for UTF-32LE,
3566
     *                   <strong>2</strong> for UTF-32BE
3567
     *
3568
     * @see        UTF8::is_utf32()
3569
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3570
     */
3571 2
    public static function isUtf32($str)
3572
    {
3573 2
        return self::is_utf32($str);
3574
    }
3575
3576
    /**
3577
     * alias for "UTF8::is_utf8()"
3578
     *
3579
     * @param string $str
3580
     * @param bool   $strict
3581
     *
3582
     * @psalm-pure
3583
     *
3584
     * @return bool
3585
     *
3586
     * @see        UTF8::is_utf8()
3587
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3588
     */
3589 17
    public static function isUtf8($str, bool $strict = false): bool
3590
    {
3591 17
        return self::is_utf8($str, $strict);
3592
    }
3593
3594
    /**
3595
     * Returns true if the string contains only alphabetic chars, false otherwise.
3596
     *
3597
     * @param string $str <p>The input string.</p>
3598
     *
3599
     * @psalm-pure
3600
     *
3601
     * @return bool
3602
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3603
     */
3604 10
    public static function is_alpha(string $str): bool
3605
    {
3606 10
        if (self::$SUPPORT['mbstring'] === true) {
3607
            /** @noinspection PhpComposerExtensionStubsInspection */
3608 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3609
        }
3610
3611
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3612
    }
3613
3614
    /**
3615
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3616
     *
3617
     * @param string $str <p>The input string.</p>
3618
     *
3619
     * @psalm-pure
3620
     *
3621
     * @return bool
3622
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3623
     */
3624 13
    public static function is_alphanumeric(string $str): bool
3625
    {
3626 13
        if (self::$SUPPORT['mbstring'] === true) {
3627
            /** @noinspection PhpComposerExtensionStubsInspection */
3628 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3629
        }
3630
3631
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3632
    }
3633
3634
    /**
3635
     * Returns true if the string contains only punctuation chars, false otherwise.
3636
     *
3637
     * @param string $str <p>The input string.</p>
3638
     *
3639
     * @psalm-pure
3640
     *
3641
     * @return bool
3642
     *              <p>Whether or not $str contains only punctuation chars.</p>
3643
     */
3644 10
    public static function is_punctuation(string $str): bool
3645
    {
3646 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3647
    }
3648
3649
    /**
3650
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3651
     *
3652
     * @param string $str <p>The input string.</p>
3653
     *
3654
     * @psalm-pure
3655
     *
3656
     * @return bool
3657
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3658
     */
3659 1
    public static function is_printable(string $str): bool
3660
    {
3661 1
        return self::remove_invisible_characters($str) === $str;
3662
    }
3663
3664
    /**
3665
     * Checks if a string is 7 bit ASCII.
3666
     *
3667
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3668
     *
3669
     * @param string $str <p>The string to check.</p>
3670
     *
3671
     * @psalm-pure
3672
     *
3673
     * @return bool
3674
     *              <p>
3675
     *              <strong>true</strong> if it is ASCII<br>
3676
     *              <strong>false</strong> otherwise
3677
     *              </p>
3678
     */
3679 8
    public static function is_ascii(string $str): bool
3680
    {
3681 8
        return ASCII::is_ascii($str);
3682
    }
3683
3684
    /**
3685
     * Returns true if the string is base64 encoded, false otherwise.
3686
     *
3687
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3688
     *
3689
     * @param string|null $str                   <p>The input string.</p>
3690
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3691
     *
3692
     * @psalm-pure
3693
     *
3694
     * @return bool
3695
     *              <p>Whether or not $str is base64 encoded.</p>
3696
     */
3697 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3698
    {
3699
        if (
3700 16
            !$empty_string_is_valid
3701
            &&
3702 16
            $str === ''
3703
        ) {
3704 3
            return false;
3705
        }
3706
3707 15
        if (!\is_string($str)) {
3708 2
            return false;
3709
        }
3710
3711 15
        $base64String = \base64_decode($str, true);
3712
3713 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3714
    }
3715
3716
    /**
3717
     * Check if the input is binary... (is look like a hack).
3718
     *
3719
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3720
     *
3721
     * @param int|string $input
3722
     * @param bool       $strict
3723
     *
3724
     * @psalm-pure
3725
     *
3726
     * @return bool
3727
     */
3728 40
    public static function is_binary($input, bool $strict = false): bool
3729
    {
3730 40
        $input = (string) $input;
3731 40
        if ($input === '') {
3732 10
            return false;
3733
        }
3734
3735 40
        if (\preg_match('~^[01]+$~', $input)) {
3736 13
            return true;
3737
        }
3738
3739 40
        $ext = self::get_file_type($input);
3740 40
        if ($ext['type'] === 'binary') {
3741 7
            return true;
3742
        }
3743
3744 39
        $test_length = \strlen($input);
3745 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3746 39
        if (($test_null_counting / $test_length) > 0.25) {
3747 15
            return true;
3748
        }
3749
3750 35
        if ($strict) {
3751 35
            if (self::$SUPPORT['finfo'] === false) {
3752
                throw new \RuntimeException('ext-fileinfo: is not installed');
3753
            }
3754
3755
            /**
3756
             * @noinspection   PhpComposerExtensionStubsInspection
3757
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3758
             */
3759 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3760 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3761 15
                return true;
3762
            }
3763
        }
3764
3765 31
        return false;
3766
    }
3767
3768
    /**
3769
     * Check if the file is binary.
3770
     *
3771
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3772
     *
3773
     * @param string $file
3774
     *
3775
     * @return bool
3776
     */
3777 6
    public static function is_binary_file($file): bool
3778
    {
3779
        // init
3780 6
        $block = '';
3781
3782 6
        $fp = \fopen($file, 'rb');
3783 6
        if (\is_resource($fp)) {
3784 6
            $block = \fread($fp, 512);
3785 6
            \fclose($fp);
3786
        }
3787
3788 6
        if ($block === '' || $block === false) {
3789 2
            return false;
3790
        }
3791
3792 6
        return self::is_binary($block, true);
3793
    }
3794
3795
    /**
3796
     * Returns true if the string contains only whitespace chars, false otherwise.
3797
     *
3798
     * @param string $str <p>The input string.</p>
3799
     *
3800
     * @psalm-pure
3801
     *
3802
     * @return bool
3803
     *              <p>Whether or not $str contains only whitespace characters.</p>
3804
     */
3805 15
    public static function is_blank(string $str): bool
3806
    {
3807 15
        if (self::$SUPPORT['mbstring'] === true) {
3808
            /** @noinspection PhpComposerExtensionStubsInspection */
3809 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3810
        }
3811
3812
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3813
    }
3814
3815
    /**
3816
     * Checks if the given string is equal to any "Byte Order Mark".
3817
     *
3818
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3819
     *
3820
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3821
     *
3822
     * @param string $str <p>The input string.</p>
3823
     *
3824
     * @psalm-pure
3825
     *
3826
     * @return bool
3827
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3828
     */
3829 2
    public static function is_bom($str): bool
3830
    {
3831
        /** @noinspection PhpUnusedLocalVariableInspection */
3832 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3833 2
            if ($str === $bom_string) {
3834 2
                return true;
3835
            }
3836
        }
3837
3838 2
        return false;
3839
    }
3840
3841
    /**
3842
     * Determine whether the string is considered to be empty.
3843
     *
3844
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3845
     * empty() does not generate a warning if the variable does not exist.
3846
     *
3847
     * @param array|float|int|string $str
3848
     *
3849
     * @psalm-pure
3850
     *
3851
     * @return bool
3852
     *              <p>Whether or not $str is empty().</p>
3853
     */
3854 1
    public static function is_empty($str): bool
3855
    {
3856 1
        return empty($str);
3857
    }
3858
3859
    /**
3860
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3861
     *
3862
     * @param string $str <p>The input string.</p>
3863
     *
3864
     * @psalm-pure
3865
     *
3866
     * @return bool
3867
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3868
     */
3869 13
    public static function is_hexadecimal(string $str): bool
3870
    {
3871 13
        if (self::$SUPPORT['mbstring'] === true) {
3872
            /** @noinspection PhpComposerExtensionStubsInspection */
3873 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3874
        }
3875
3876
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3877
    }
3878
3879
    /**
3880
     * Check if the string contains any HTML tags.
3881
     *
3882
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3883
     *
3884
     * @param string $str <p>The input string.</p>
3885
     *
3886
     * @psalm-pure
3887
     *
3888
     * @return bool
3889
     *              <p>Whether or not $str contains html elements.</p>
3890
     */
3891 3
    public static function is_html(string $str): bool
3892
    {
3893 3
        if ($str === '') {
3894 3
            return false;
3895
        }
3896
3897
        // init
3898 3
        $matches = [];
3899
3900 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3901
3902 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3903
3904 3
        return $matches !== [];
3905
    }
3906
3907
    /**
3908
     * Check if $url is an correct url.
3909
     *
3910
     * @param string $url
3911
     * @param bool   $disallow_localhost
3912
     *
3913
     * @psalm-pure
3914
     *
3915
     * @return bool
3916
     */
3917 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3918
    {
3919 1
        if ($url === '') {
3920 1
            return false;
3921
        }
3922
3923
        // WARNING: keep this as hack protection
3924 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3925 1
            return false;
3926
        }
3927
3928
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3929 1
        if ($disallow_localhost) {
3930 1
            if (self::str_istarts_with_any(
3931 1
                $url,
3932
                [
3933 1
                    'http://localhost',
3934
                    'https://localhost',
3935
                    'http://127.0.0.1',
3936
                    'https://127.0.0.1',
3937
                    'http://::1',
3938
                    'https://::1',
3939
                ]
3940
            )) {
3941 1
                return false;
3942
            }
3943
3944 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3945
            /** @noinspection BypassedUrlValidationInspection */
3946 1
            if (\preg_match($regex, $url)) {
3947 1
                return false;
3948
            }
3949
        }
3950
3951
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3952
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3953 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3954
        /** @noinspection BypassedUrlValidationInspection */
3955 1
        if (\preg_match($regex, $url)) {
3956 1
            return true;
3957
        }
3958
3959
        /** @noinspection BypassedUrlValidationInspection */
3960 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3961
    }
3962
3963
    /**
3964
     * Try to check if "$str" is a JSON-string.
3965
     *
3966
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3967
     *
3968
     * @param string $str                                    <p>The input string.</p>
3969
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3970
     *                                                       results.</p>
3971
     *
3972
     * @return bool
3973
     *              <p>Whether or not the $str is in JSON format.</p>
3974
     */
3975 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
3976
    {
3977 42
        if ($str === '') {
3978 4
            return false;
3979
        }
3980
3981 40
        if (self::$SUPPORT['json'] === false) {
3982
            throw new \RuntimeException('ext-json: is not installed');
3983
        }
3984
3985 40
        $jsonOrNull = self::json_decode($str);
3986 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
3987 18
            return false;
3988
        }
3989
3990
        if (
3991 24
            $only_array_or_object_results_are_valid
3992
            &&
3993 24
            !\is_object($jsonOrNull)
3994
            &&
3995 24
            !\is_array($jsonOrNull)
3996
        ) {
3997 5
            return false;
3998
        }
3999
4000
        /** @noinspection PhpComposerExtensionStubsInspection */
4001 19
        return \json_last_error() === \JSON_ERROR_NONE;
4002
    }
4003
4004
    /**
4005
     * @param string $str <p>The input string.</p>
4006
     *
4007
     * @psalm-pure
4008
     *
4009
     * @return bool
4010
     *              <p>Whether or not $str contains only lowercase chars.</p>
4011
     */
4012 8
    public static function is_lowercase(string $str): bool
4013
    {
4014 8
        if (self::$SUPPORT['mbstring'] === true) {
4015
            /** @noinspection PhpComposerExtensionStubsInspection */
4016 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4017
        }
4018
4019
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4020
    }
4021
4022
    /**
4023
     * Returns true if the string is serialized, false otherwise.
4024
     *
4025
     * @param string $str <p>The input string.</p>
4026
     *
4027
     * @psalm-pure
4028
     *
4029
     * @return bool
4030
     *              <p>Whether or not $str is serialized.</p>
4031
     */
4032 7
    public static function is_serialized(string $str): bool
4033
    {
4034 7
        if ($str === '') {
4035 1
            return false;
4036
        }
4037
4038
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4039
        /** @noinspection UnserializeExploitsInspection */
4040 6
        return $str === 'b:0;'
4041
               ||
4042 6
               @\unserialize($str) !== false;
4043
    }
4044
4045
    /**
4046
     * Returns true if the string contains only lower case chars, false
4047
     * otherwise.
4048
     *
4049
     * @param string $str <p>The input string.</p>
4050
     *
4051
     * @psalm-pure
4052
     *
4053
     * @return bool
4054
     *              <p>Whether or not $str contains only lower case characters.</p>
4055
     */
4056 8
    public static function is_uppercase(string $str): bool
4057
    {
4058 8
        if (self::$SUPPORT['mbstring'] === true) {
4059
            /** @noinspection PhpComposerExtensionStubsInspection */
4060 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4061
        }
4062
4063
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4064
    }
4065
4066
    /**
4067
     * Check if the string is UTF-16.
4068
     *
4069
     * EXAMPLE: <code>
4070
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4071
     * //
4072
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4073
     * //
4074
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4075
     * </code>
4076
     *
4077
     * @param string $str                       <p>The input string.</p>
4078
     * @param bool   $check_if_string_is_binary
4079
     *
4080
     * @psalm-pure
4081
     *
4082
     * @return false|int
4083
     *                   <strong>false</strong> if is't not UTF-16,<br>
4084
     *                   <strong>1</strong> for UTF-16LE,<br>
4085
     *                   <strong>2</strong> for UTF-16BE
4086
     */
4087 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4088
    {
4089
        // init
4090 22
        $str = (string) $str;
4091 22
        $str_chars = [];
4092
4093
        if (
4094 22
            $check_if_string_is_binary
4095
            &&
4096 22
            !self::is_binary($str, true)
4097
        ) {
4098 2
            return false;
4099
        }
4100
4101 22
        if (self::$SUPPORT['mbstring'] === false) {
4102
            /**
4103
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4104
             */
4105 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4106
        }
4107
4108 22
        $str = self::remove_bom($str);
4109
4110 22
        $maybe_utf16le = 0;
4111 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4112 22
        if ($test) {
4113 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4114 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4115 15
            if ($test3 === $test) {
4116
                /**
4117
                 * @psalm-suppress RedundantCondition
4118
                 */
4119 15
                if ($str_chars === []) {
4120 15
                    $str_chars = self::count_chars($str, true, false);
4121
                }
4122 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4123 15
                    if (\in_array($test3char, $str_chars, true)) {
4124 15
                        ++$maybe_utf16le;
4125
                    }
4126
                }
4127 15
                unset($test3charEmpty);
4128
            }
4129
        }
4130
4131 22
        $maybe_utf16be = 0;
4132 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4133 22
        if ($test) {
4134 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4135 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4136 15
            if ($test3 === $test) {
4137 15
                if ($str_chars === []) {
4138 7
                    $str_chars = self::count_chars($str, true, false);
4139
                }
4140 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4141 15
                    if (\in_array($test3char, $str_chars, true)) {
4142 15
                        ++$maybe_utf16be;
4143
                    }
4144
                }
4145 15
                unset($test3charEmpty);
4146
            }
4147
        }
4148
4149 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4150 7
            if ($maybe_utf16le > $maybe_utf16be) {
4151 5
                return 1;
4152
            }
4153
4154 6
            return 2;
4155
        }
4156
4157 18
        return false;
4158
    }
4159
4160
    /**
4161
     * Check if the string is UTF-32.
4162
     *
4163
     * EXAMPLE: <code>
4164
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4165
     * //
4166
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4167
     * //
4168
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4169
     * </code>
4170
     *
4171
     * @param string $str                       <p>The input string.</p>
4172
     * @param bool   $check_if_string_is_binary
4173
     *
4174
     * @psalm-pure
4175
     *
4176
     * @return false|int
4177
     *                   <strong>false</strong> if is't not UTF-32,<br>
4178
     *                   <strong>1</strong> for UTF-32LE,<br>
4179
     *                   <strong>2</strong> for UTF-32BE
4180
     */
4181 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4182
    {
4183
        // init
4184 20
        $str = (string) $str;
4185 20
        $str_chars = [];
4186
4187
        if (
4188 20
            $check_if_string_is_binary
4189
            &&
4190 20
            !self::is_binary($str, true)
4191
        ) {
4192 2
            return false;
4193
        }
4194
4195 20
        if (self::$SUPPORT['mbstring'] === false) {
4196
            /**
4197
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4198
             */
4199 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4200
        }
4201
4202 20
        $str = self::remove_bom($str);
4203
4204 20
        $maybe_utf32le = 0;
4205 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4206 20
        if ($test) {
4207 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4208 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4209 13
            if ($test3 === $test) {
4210
                /**
4211
                 * @psalm-suppress RedundantCondition
4212
                 */
4213 13
                if ($str_chars === []) {
4214 13
                    $str_chars = self::count_chars($str, true, false);
4215
                }
4216 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4217 13
                    if (\in_array($test3char, $str_chars, true)) {
4218 13
                        ++$maybe_utf32le;
4219
                    }
4220
                }
4221 13
                unset($test3charEmpty);
4222
            }
4223
        }
4224
4225 20
        $maybe_utf32be = 0;
4226 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4227 20
        if ($test) {
4228 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4229 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4230 13
            if ($test3 === $test) {
4231 13
                if ($str_chars === []) {
4232 7
                    $str_chars = self::count_chars($str, true, false);
4233
                }
4234 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4235 13
                    if (\in_array($test3char, $str_chars, true)) {
4236 13
                        ++$maybe_utf32be;
4237
                    }
4238
                }
4239 13
                unset($test3charEmpty);
4240
            }
4241
        }
4242
4243 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4244 3
            if ($maybe_utf32le > $maybe_utf32be) {
4245 2
                return 1;
4246
            }
4247
4248 3
            return 2;
4249
        }
4250
4251 20
        return false;
4252
    }
4253
4254
    /**
4255
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4256
     *
4257
     * EXAMPLE: <code>
4258
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4259
     * //
4260
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4261
     * </code>
4262
     *
4263
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4264
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4265
     *
4266
     * @psalm-pure
4267
     *
4268
     * @return bool
4269
     */
4270 83
    public static function is_utf8($str, bool $strict = false): bool
4271
    {
4272 83
        if (\is_array($str)) {
4273 2
            foreach ($str as &$v) {
4274 2
                if (!self::is_utf8($v, $strict)) {
4275 2
                    return false;
4276
                }
4277
            }
4278
4279
            return true;
4280
        }
4281
4282 83
        return self::is_utf8_string((string) $str, $strict);
4283
    }
4284
4285
    /**
4286
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4287
     * Decodes a JSON string
4288
     *
4289
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4290
     *
4291
     * @see http://php.net/manual/en/function.json-decode.php
4292
     *
4293
     * @param string $json    <p>
4294
     *                        The <i>json</i> string being decoded.
4295
     *                        </p>
4296
     *                        <p>
4297
     *                        This function only works with UTF-8 encoded strings.
4298
     *                        </p>
4299
     *                        <p>PHP implements a superset of
4300
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4301
     *                        only supports these values when they are nested inside an array or an object.
4302
     *                        </p>
4303
     * @param bool   $assoc   [optional] <p>
4304
     *                        When <b>TRUE</b>, returned objects will be converted into
4305
     *                        associative arrays.
4306
     *                        </p>
4307
     * @param int    $depth   [optional] <p>
4308
     *                        User specified recursion depth.
4309
     *                        </p>
4310
     * @param int    $options [optional] <p>
4311
     *                        Bitmask of JSON decode options. Currently only
4312
     *                        <b>JSON_BIGINT_AS_STRING</b>
4313
     *                        is supported (default is to cast large integers as floats)
4314
     *                        </p>
4315
     *
4316
     * @psalm-pure
4317
     *
4318
     * @return mixed
4319
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4320
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4321
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4322
     *               is deeper than the recursion limit.</p>
4323
     */
4324 43
    public static function json_decode(
4325
        string $json,
4326
        bool $assoc = false,
4327
        int $depth = 512,
4328
        int $options = 0
4329
    ) {
4330 43
        $json = self::filter($json);
4331
4332 43
        if (self::$SUPPORT['json'] === false) {
4333
            throw new \RuntimeException('ext-json: is not installed');
4334
        }
4335
4336
        /** @noinspection PhpComposerExtensionStubsInspection */
4337 43
        return \json_decode($json, $assoc, $depth, $options);
4338
    }
4339
4340
    /**
4341
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4342
     * Returns the JSON representation of a value.
4343
     *
4344
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4345
     *
4346
     * @see http://php.net/manual/en/function.json-encode.php
4347
     *
4348
     * @param mixed $value   <p>
4349
     *                       The <i>value</i> being encoded. Can be any type except
4350
     *                       a resource.
4351
     *                       </p>
4352
     *                       <p>
4353
     *                       All string data must be UTF-8 encoded.
4354
     *                       </p>
4355
     *                       <p>PHP implements a superset of
4356
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4357
     *                       only supports these values when they are nested inside an array or an object.
4358
     *                       </p>
4359
     * @param int   $options [optional] <p>
4360
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4361
     *                       <b>JSON_HEX_TAG</b>,
4362
     *                       <b>JSON_HEX_AMP</b>,
4363
     *                       <b>JSON_HEX_APOS</b>,
4364
     *                       <b>JSON_NUMERIC_CHECK</b>,
4365
     *                       <b>JSON_PRETTY_PRINT</b>,
4366
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4367
     *                       <b>JSON_FORCE_OBJECT</b>,
4368
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4369
     *                       constants is described on
4370
     *                       the JSON constants page.
4371
     *                       </p>
4372
     * @param int   $depth   [optional] <p>
4373
     *                       Set the maximum depth. Must be greater than zero.
4374
     *                       </p>
4375
     *
4376
     * @psalm-pure
4377
     *
4378
     * @return false|string
4379
     *                      A JSON encoded <strong>string</strong> on success or<br>
4380
     *                      <strong>FALSE</strong> on failure
4381
     */
4382 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4383
    {
4384 5
        $value = self::filter($value);
4385
4386 5
        if (self::$SUPPORT['json'] === false) {
4387
            throw new \RuntimeException('ext-json: is not installed');
4388
        }
4389
4390
        /** @noinspection PhpComposerExtensionStubsInspection */
4391 5
        return \json_encode($value, $options, $depth);
4392
    }
4393
4394
    /**
4395
     * Checks whether JSON is available on the server.
4396
     *
4397
     * @psalm-pure
4398
     *
4399
     * @return bool
4400
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4401
     */
4402
    public static function json_loaded(): bool
4403
    {
4404
        return \function_exists('json_decode');
4405
    }
4406
4407
    /**
4408
     * Makes string's first char lowercase.
4409
     *
4410
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4411
     *
4412
     * @param string      $str                           <p>The input string</p>
4413
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4414
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4415
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4416
     *                                                   tr</p>
4417
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4418
     *                                                   -> ß</p>
4419
     *
4420
     * @psalm-pure
4421
     *
4422
     * @return string the resulting string
4423
     */
4424 46
    public static function lcfirst(
4425
        string $str,
4426
        string $encoding = 'UTF-8',
4427
        bool $clean_utf8 = false,
4428
        string $lang = null,
4429
        bool $try_to_keep_the_string_length = false
4430
    ): string {
4431 46
        if ($clean_utf8) {
4432
            $str = self::clean($str);
4433
        }
4434
4435 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4436
4437 46
        if ($encoding === 'UTF-8') {
4438 43
            $str_part_two = (string) \mb_substr($str, 1);
4439
4440 43
            if ($use_mb_functions) {
4441 43
                $str_part_one = \mb_strtolower(
4442 43
                    (string) \mb_substr($str, 0, 1)
4443
                );
4444
            } else {
4445
                $str_part_one = self::strtolower(
4446
                    (string) \mb_substr($str, 0, 1),
4447
                    $encoding,
4448
                    false,
4449
                    $lang,
4450 43
                    $try_to_keep_the_string_length
4451
                );
4452
            }
4453
        } else {
4454 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4455
4456 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4457
4458 3
            $str_part_one = self::strtolower(
4459 3
                (string) self::substr($str, 0, 1, $encoding),
4460 3
                $encoding,
4461 3
                false,
4462 3
                $lang,
4463 3
                $try_to_keep_the_string_length
4464
            );
4465
        }
4466
4467 46
        return $str_part_one . $str_part_two;
4468
    }
4469
4470
    /**
4471
     * alias for "UTF8::lcfirst()"
4472
     *
4473
     * @param string      $str
4474
     * @param string      $encoding
4475
     * @param bool        $clean_utf8
4476
     * @param string|null $lang
4477
     * @param bool        $try_to_keep_the_string_length
4478
     *
4479
     * @psalm-pure
4480
     *
4481
     * @return string
4482
     *
4483
     * @see        UTF8::lcfirst()
4484
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4485
     */
4486 2
    public static function lcword(
4487
        string $str,
4488
        string $encoding = 'UTF-8',
4489
        bool $clean_utf8 = false,
4490
        string $lang = null,
4491
        bool $try_to_keep_the_string_length = false
4492
    ): string {
4493 2
        return self::lcfirst(
4494 2
            $str,
4495 2
            $encoding,
4496 2
            $clean_utf8,
4497 2
            $lang,
4498 2
            $try_to_keep_the_string_length
4499
        );
4500
    }
4501
4502
    /**
4503
     * Lowercase for all words in the string.
4504
     *
4505
     * @param string      $str                           <p>The input string.</p>
4506
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4507
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4508
     *                                                   not start a new word.</p>
4509
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4510
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4511
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4512
     *                                                   tr</p>
4513
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4514
     *                                                   -> ß</p>
4515
     *
4516
     * @psalm-pure
4517
     *
4518
     * @return string
4519
     */
4520 2
    public static function lcwords(
4521
        string $str,
4522
        array $exceptions = [],
4523
        string $char_list = '',
4524
        string $encoding = 'UTF-8',
4525
        bool $clean_utf8 = false,
4526
        string $lang = null,
4527
        bool $try_to_keep_the_string_length = false
4528
    ): string {
4529 2
        if (!$str) {
4530 2
            return '';
4531
        }
4532
4533 2
        $words = self::str_to_words($str, $char_list);
4534 2
        $use_exceptions = $exceptions !== [];
4535
4536 2
        $words_str = '';
4537 2
        foreach ($words as &$word) {
4538 2
            if (!$word) {
4539 2
                continue;
4540
            }
4541
4542
            if (
4543 2
                !$use_exceptions
4544
                ||
4545 2
                !\in_array($word, $exceptions, true)
4546
            ) {
4547 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4548
            } else {
4549 2
                $words_str .= $word;
4550
            }
4551
        }
4552
4553 2
        return $words_str;
4554
    }
4555
4556
    /**
4557
     * alias for "UTF8::lcfirst()"
4558
     *
4559
     * @param string      $str
4560
     * @param string      $encoding
4561
     * @param bool        $clean_utf8
4562
     * @param string|null $lang
4563
     * @param bool        $try_to_keep_the_string_length
4564
     *
4565
     * @psalm-pure
4566
     *
4567
     * @return string
4568
     *
4569
     * @see        UTF8::lcfirst()
4570
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4571
     */
4572 5
    public static function lowerCaseFirst(
4573
        string $str,
4574
        string $encoding = 'UTF-8',
4575
        bool $clean_utf8 = false,
4576
        string $lang = null,
4577
        bool $try_to_keep_the_string_length = false
4578
    ): string {
4579 5
        return self::lcfirst(
4580 5
            $str,
4581 5
            $encoding,
4582 5
            $clean_utf8,
4583 5
            $lang,
4584 5
            $try_to_keep_the_string_length
4585
        );
4586
    }
4587
4588
    /**
4589
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4590
     *
4591
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4592
     *
4593
     * @param string      $str   <p>The string to be trimmed</p>
4594
     * @param string|null $chars <p>Optional characters to be stripped</p>
4595
     *
4596
     * @psalm-pure
4597
     *
4598
     * @return string the string with unwanted characters stripped from the left
4599
     */
4600 23
    public static function ltrim(string $str = '', string $chars = null): string
4601
    {
4602 23
        if ($str === '') {
4603 3
            return '';
4604
        }
4605
4606 22
        if (self::$SUPPORT['mbstring'] === true) {
4607 22
            if ($chars !== null) {
4608
                /** @noinspection PregQuoteUsageInspection */
4609 11
                $chars = \preg_quote($chars);
4610 11
                $pattern = "^[${chars}]+";
4611
            } else {
4612 14
                $pattern = '^[\\s]+';
4613
            }
4614
4615
            /** @noinspection PhpComposerExtensionStubsInspection */
4616 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4617
        }
4618
4619
        if ($chars !== null) {
4620
            $chars = \preg_quote($chars, '/');
4621
            $pattern = "^[${chars}]+";
4622
        } else {
4623
            $pattern = '^[\\s]+';
4624
        }
4625
4626
        return self::regex_replace($str, $pattern, '');
4627
    }
4628
4629
    /**
4630
     * Returns the UTF-8 character with the maximum code point in the given data.
4631
     *
4632
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4633
     *
4634
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4635
     *
4636
     * @psalm-pure
4637
     *
4638
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4639
     */
4640
    public static function max($arg)
4641
    {
4642 2
        if (\is_array($arg)) {
4643 2
            $arg = \implode('', $arg);
4644
        }
4645
4646 2
        $codepoints = self::codepoints($arg);
4647 2
        if ($codepoints === []) {
4648 2
            return null;
4649
        }
4650
4651 2
        $codepoint_max = \max($codepoints);
4652
4653 2
        return self::chr((int) $codepoint_max);
4654
    }
4655
4656
    /**
4657
     * Calculates and returns the maximum number of bytes taken by any
4658
     * UTF-8 encoded character in the given string.
4659
     *
4660
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4661
     *
4662
     * @param string $str <p>The original Unicode string.</p>
4663
     *
4664
     * @psalm-pure
4665
     *
4666
     * @return int
4667
     *             <p>Max byte lengths of the given chars.</p>
4668
     */
4669
    public static function max_chr_width(string $str): int
4670
    {
4671 2
        $bytes = self::chr_size_list($str);
4672 2
        if ($bytes !== []) {
4673 2
            return (int) \max($bytes);
4674
        }
4675
4676 2
        return 0;
4677
    }
4678
4679
    /**
4680
     * Checks whether mbstring is available on the server.
4681
     *
4682
     * @psalm-pure
4683
     *
4684
     * @return bool
4685
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4686
     */
4687
    public static function mbstring_loaded(): bool
4688
    {
4689 26
        return \extension_loaded('mbstring');
4690
    }
4691
4692
    /**
4693
     * Returns the UTF-8 character with the minimum code point in the given data.
4694
     *
4695
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4696
     *
4697
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4698
     *
4699
     * @psalm-pure
4700
     *
4701
     * @return string|null
4702
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4703
     */
4704
    public static function min($arg)
4705
    {
4706 2
        if (\is_array($arg)) {
4707 2
            $arg = \implode('', $arg);
4708
        }
4709
4710 2
        $codepoints = self::codepoints($arg);
4711 2
        if ($codepoints === []) {
4712 2
            return null;
4713
        }
4714
4715 2
        $codepoint_min = \min($codepoints);
4716
4717 2
        return self::chr((int) $codepoint_min);
4718
    }
4719
4720
    /**
4721
     * alias for "UTF8::normalize_encoding()"
4722
     *
4723
     * @param mixed $encoding
4724
     * @param mixed $fallback
4725
     *
4726
     * @psalm-pure
4727
     *
4728
     * @return mixed
4729
     *
4730
     * @see        UTF8::normalize_encoding()
4731
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4732
     */
4733
    public static function normalizeEncoding($encoding, $fallback = '')
4734
    {
4735 2
        return self::normalize_encoding($encoding, $fallback);
4736
    }
4737
4738
    /**
4739
     * Normalize the encoding-"name" input.
4740
     *
4741
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4742
     *
4743
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4744
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4745
     *
4746
     * @psalm-pure
4747
     *
4748
     * @return mixed|string
4749
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4750
     *
4751
     * @template TNormalizeEncodingFallback
4752
     * @psalm-param string|TNormalizeEncodingFallback $fallback
4753
     * @psalm-return string|TNormalizeEncodingFallback
4754
     */
4755
    public static function normalize_encoding($encoding, $fallback = '')
4756
    {
4757
        /**
4758
         * @psalm-suppress ImpureStaticVariable
4759
         *
4760
         * @var array<string,string>
4761
         */
4762 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4763
4764
        // init
4765 339
        $encoding = (string) $encoding;
4766
4767 339
        if (!$encoding) {
4768 290
            return $fallback;
4769
        }
4770
4771
        if (
4772 53
            $encoding === 'UTF-8'
4773
            ||
4774 53
            $encoding === 'UTF8'
4775
        ) {
4776 29
            return 'UTF-8';
4777
        }
4778
4779
        if (
4780 44
            $encoding === '8BIT'
4781
            ||
4782 44
            $encoding === 'BINARY'
4783
        ) {
4784
            return 'CP850';
4785
        }
4786
4787
        if (
4788 44
            $encoding === 'HTML'
4789
            ||
4790 44
            $encoding === 'HTML-ENTITIES'
4791
        ) {
4792 2
            return 'HTML-ENTITIES';
4793
        }
4794
4795
        if (
4796 44
            $encoding === 'ISO'
4797
            ||
4798 44
            $encoding === 'ISO-8859-1'
4799
        ) {
4800 41
            return 'ISO-8859-1';
4801
        }
4802
4803
        if (
4804 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4805
            ||
4806 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4807
        ) {
4808
            return $fallback;
4809
        }
4810
4811 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4812 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4813
        }
4814
4815 5
        if (self::$ENCODINGS === null) {
4816 1
            self::$ENCODINGS = self::getData('encodings');
4817
        }
4818
4819 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4820 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4821
4822 3
            return $encoding;
4823
        }
4824
4825 4
        $encoding_original = $encoding;
4826 4
        $encoding = \strtoupper($encoding);
4827 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4828
4829
        $equivalences = [
4830 4
            'ISO8859'     => 'ISO-8859-1',
4831
            'ISO88591'    => 'ISO-8859-1',
4832
            'ISO'         => 'ISO-8859-1',
4833
            'LATIN'       => 'ISO-8859-1',
4834
            'LATIN1'      => 'ISO-8859-1', // Western European
4835
            'ISO88592'    => 'ISO-8859-2',
4836
            'LATIN2'      => 'ISO-8859-2', // Central European
4837
            'ISO88593'    => 'ISO-8859-3',
4838
            'LATIN3'      => 'ISO-8859-3', // Southern European
4839
            'ISO88594'    => 'ISO-8859-4',
4840
            'LATIN4'      => 'ISO-8859-4', // Northern European
4841
            'ISO88595'    => 'ISO-8859-5',
4842
            'ISO88596'    => 'ISO-8859-6', // Greek
4843
            'ISO88597'    => 'ISO-8859-7',
4844
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4845
            'ISO88599'    => 'ISO-8859-9',
4846
            'LATIN5'      => 'ISO-8859-9', // Turkish
4847
            'ISO885911'   => 'ISO-8859-11',
4848
            'TIS620'      => 'ISO-8859-11', // Thai
4849
            'ISO885910'   => 'ISO-8859-10',
4850
            'LATIN6'      => 'ISO-8859-10', // Nordic
4851
            'ISO885913'   => 'ISO-8859-13',
4852
            'LATIN7'      => 'ISO-8859-13', // Baltic
4853
            'ISO885914'   => 'ISO-8859-14',
4854
            'LATIN8'      => 'ISO-8859-14', // Celtic
4855
            'ISO885915'   => 'ISO-8859-15',
4856
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4857
            'ISO885916'   => 'ISO-8859-16',
4858
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4859
            'CP1250'      => 'WINDOWS-1250',
4860
            'WIN1250'     => 'WINDOWS-1250',
4861
            'WINDOWS1250' => 'WINDOWS-1250',
4862
            'CP1251'      => 'WINDOWS-1251',
4863
            'WIN1251'     => 'WINDOWS-1251',
4864
            'WINDOWS1251' => 'WINDOWS-1251',
4865
            'CP1252'      => 'WINDOWS-1252',
4866
            'WIN1252'     => 'WINDOWS-1252',
4867
            'WINDOWS1252' => 'WINDOWS-1252',
4868
            'CP1253'      => 'WINDOWS-1253',
4869
            'WIN1253'     => 'WINDOWS-1253',
4870
            'WINDOWS1253' => 'WINDOWS-1253',
4871
            'CP1254'      => 'WINDOWS-1254',
4872
            'WIN1254'     => 'WINDOWS-1254',
4873
            'WINDOWS1254' => 'WINDOWS-1254',
4874
            'CP1255'      => 'WINDOWS-1255',
4875
            'WIN1255'     => 'WINDOWS-1255',
4876
            'WINDOWS1255' => 'WINDOWS-1255',
4877
            'CP1256'      => 'WINDOWS-1256',
4878
            'WIN1256'     => 'WINDOWS-1256',
4879
            'WINDOWS1256' => 'WINDOWS-1256',
4880
            'CP1257'      => 'WINDOWS-1257',
4881
            'WIN1257'     => 'WINDOWS-1257',
4882
            'WINDOWS1257' => 'WINDOWS-1257',
4883
            'CP1258'      => 'WINDOWS-1258',
4884
            'WIN1258'     => 'WINDOWS-1258',
4885
            'WINDOWS1258' => 'WINDOWS-1258',
4886
            'UTF16'       => 'UTF-16',
4887
            'UTF32'       => 'UTF-32',
4888
            'UTF8'        => 'UTF-8',
4889
            'UTF'         => 'UTF-8',
4890
            'UTF7'        => 'UTF-7',
4891
            '8BIT'        => 'CP850',
4892
            'BINARY'      => 'CP850',
4893
        ];
4894
4895 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4896 3
            $encoding = $equivalences[$encoding_upper_helper];
4897
        }
4898
4899 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4900
4901 4
        return $encoding;
4902
    }
4903
4904
    /**
4905
     * Standardize line ending to unix-like.
4906
     *
4907
     * @param string          $str      <p>The input string.</p>
4908
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4909
     *                                  here.</p>
4910
     *
4911
     * @psalm-pure
4912
     *
4913
     * @return string
4914
     *                <p>A string with normalized line ending.</p>
4915
     */
4916
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4917
    {
4918 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4919
    }
4920
4921
    /**
4922
     * Normalize some MS Word special characters.
4923
     *
4924
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4925
     *
4926
     * @param string $str <p>The string to be normalized.</p>
4927
     *
4928
     * @psalm-pure
4929
     *
4930
     * @return string
4931
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4932
     */
4933
    public static function normalize_msword(string $str): string
4934
    {
4935 10
        return ASCII::normalize_msword($str);
4936
    }
4937
4938
    /**
4939
     * Normalize the whitespace.
4940
     *
4941
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4942
     *
4943
     * @param string $str                        <p>The string to be normalized.</p>
4944
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4945
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4946
     *                                           bidirectional text chars.</p>
4947
     *
4948
     * @psalm-pure
4949
     *
4950
     * @return string
4951
     *                <p>A string with normalized whitespace.</p>
4952
     */
4953
    public static function normalize_whitespace(
4954
        string $str,
4955
        bool $keep_non_breaking_space = false,
4956
        bool $keep_bidi_unicode_controls = false
4957
    ): string {
4958 61
        return ASCII::normalize_whitespace(
4959 61
            $str,
4960 61
            $keep_non_breaking_space,
4961 61
            $keep_bidi_unicode_controls
4962
        );
4963
    }
4964
4965
    /**
4966
     * Calculates Unicode code point of the given UTF-8 encoded character.
4967
     *
4968
     * INFO: opposite to UTF8::chr()
4969
     *
4970
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4971
     *
4972
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4973
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4974
     *
4975
     * @psalm-pure
4976
     *
4977
     * @return int
4978
     *             <p>Unicode code point of the given character,<br>
4979
     *             0 on invalid UTF-8 byte sequence</p>
4980
     */
4981
    public static function ord($chr, string $encoding = 'UTF-8'): int
4982
    {
4983
        /**
4984
         * @psalm-suppress ImpureStaticVariable
4985
         *
4986
         * @var array<string,int>
4987
         */
4988 27
        static $CHAR_CACHE = [];
4989
4990
        // init
4991 27
        $chr = (string) $chr;
4992
4993 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4994 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4995
        }
4996
4997 27
        $cache_key = $chr . '_' . $encoding;
4998 27
        if (isset($CHAR_CACHE[$cache_key])) {
4999 27
            return $CHAR_CACHE[$cache_key];
5000
        }
5001
5002
        // check again, if it's still not UTF-8
5003 11
        if ($encoding !== 'UTF-8') {
5004 3
            $chr = self::encode($encoding, $chr);
5005
        }
5006
5007 11
        if (self::$ORD === null) {
5008
            self::$ORD = self::getData('ord');
5009
        }
5010
5011 11
        if (isset(self::$ORD[$chr])) {
5012 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5013
        }
5014
5015
        //
5016
        // fallback via "IntlChar"
5017
        //
5018
5019 6
        if (self::$SUPPORT['intlChar'] === true) {
5020
            /** @noinspection PhpComposerExtensionStubsInspection */
5021 5
            $code = \IntlChar::ord($chr);
5022 5
            if ($code) {
5023 5
                return $CHAR_CACHE[$cache_key] = $code;
5024
            }
5025
        }
5026
5027
        //
5028
        // fallback via vanilla php
5029
        //
5030
5031
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5032 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5033
        /** @noinspection OffsetOperationsInspection */
5034 1
        $code = $chr ? $chr[1] : 0;
5035
5036
        /** @noinspection OffsetOperationsInspection */
5037 1
        if ($code >= 0xF0 && isset($chr[4])) {
5038
            /** @noinspection UnnecessaryCastingInspection */
5039
            /** @noinspection OffsetOperationsInspection */
5040
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5041
        }
5042
5043
        /** @noinspection OffsetOperationsInspection */
5044 1
        if ($code >= 0xE0 && isset($chr[3])) {
5045
            /** @noinspection UnnecessaryCastingInspection */
5046
            /** @noinspection OffsetOperationsInspection */
5047 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5048
        }
5049
5050
        /** @noinspection OffsetOperationsInspection */
5051 1
        if ($code >= 0xC0 && isset($chr[2])) {
5052
            /** @noinspection UnnecessaryCastingInspection */
5053
            /** @noinspection OffsetOperationsInspection */
5054 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5055
        }
5056
5057
        return $CHAR_CACHE[$cache_key] = $code;
5058
    }
5059
5060
    /**
5061
     * Parses the string into an array (into the the second parameter).
5062
     *
5063
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5064
     *          if the second parameter is not set!
5065
     *
5066
     * EXAMPLE: <code>
5067
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5068
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5069
     * </code>
5070
     *
5071
     * @see http://php.net/manual/en/function.parse-str.php
5072
     *
5073
     * @param string $str        <p>The input string.</p>
5074
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5075
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5076
     *
5077
     * @psalm-pure
5078
     *
5079
     * @return bool
5080
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5081
     */
5082
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5083
    {
5084 2
        if ($clean_utf8) {
5085 2
            $str = self::clean($str);
5086
        }
5087
5088 2
        if (self::$SUPPORT['mbstring'] === true) {
5089 2
            $return = \mb_parse_str($str, $result);
5090
5091 2
            return $return !== false && $result !== [];
5092
        }
5093
5094
        /**
5095
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5096
         */
5097
        \parse_str($str, $result);
5098
5099
        return $result !== [];
5100
    }
5101
5102
    /**
5103
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5104
     *
5105
     * @psalm-pure
5106
     *
5107
     * @return bool
5108
     *              <p>
5109
     *              <strong>true</strong> if support is available,<br>
5110
     *              <strong>false</strong> otherwise
5111
     *              </p>
5112
     */
5113
    public static function pcre_utf8_support(): bool
5114
    {
5115
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5116
        return (bool) @\preg_match('//u', '');
5117
    }
5118
5119
    /**
5120
     * Create an array containing a range of UTF-8 characters.
5121
     *
5122
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5123
     *
5124
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5125
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5126
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5127
     *                              "is_numeric"</p>
5128
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5129
     * @param float|int  $step      [optional] <p>
5130
     *                              If a step value is given, it will be used as the
5131
     *                              increment between elements in the sequence. step
5132
     *                              should be given as a positive number. If not specified,
5133
     *                              step will default to 1.
5134
     *                              </p>
5135
     *
5136
     * @psalm-pure
5137
     *
5138
     * @return string[]
5139
     */
5140
    public static function range(
5141
        $var1,
5142
        $var2,
5143
        bool $use_ctype = true,
5144
        string $encoding = 'UTF-8',
5145
        $step = 1
5146
    ): array {
5147 2
        if (!$var1 || !$var2) {
5148 2
            return [];
5149
        }
5150
5151 2
        if ($step !== 1) {
5152
            /**
5153
             * @psalm-suppress RedundantConditionGivenDocblockType
5154
             * @psalm-suppress DocblockTypeContradiction
5155
             */
5156 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5157
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5158
            }
5159
5160
            /**
5161
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5162
             */
5163 1
            if ($step <= 0) {
5164
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5165
            }
5166
        }
5167
5168 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5169
            throw new \RuntimeException('ext-ctype: is not installed');
5170
        }
5171
5172 2
        $is_digit = false;
5173 2
        $is_xdigit = false;
5174
5175
        /** @noinspection PhpComposerExtensionStubsInspection */
5176 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5177 2
            $is_digit = true;
5178 2
            $start = (int) $var1;
5179 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5180
            $is_xdigit = true;
5181
            $start = (int) self::hex_to_int((string) $var1);
5182 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5183 1
            $start = (int) $var1;
5184
        } else {
5185 2
            $start = self::ord((string) $var1);
5186
        }
5187
5188 2
        if (!$start) {
5189
            return [];
5190
        }
5191
5192 2
        if ($is_digit) {
5193 2
            $end = (int) $var2;
5194 2
        } elseif ($is_xdigit) {
5195
            $end = (int) self::hex_to_int((string) $var2);
5196 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5197 1
            $end = (int) $var2;
5198
        } else {
5199 2
            $end = self::ord((string) $var2);
5200
        }
5201
5202 2
        if (!$end) {
5203
            return [];
5204
        }
5205
5206 2
        $array = [];
5207 2
        foreach (\range($start, $end, $step) as $i) {
5208 2
            $array[] = (string) self::chr((int) $i, $encoding);
5209
        }
5210
5211 2
        return $array;
5212
    }
5213
5214
    /**
5215
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5216
     *
5217
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5218
     *
5219
     * e.g:
5220
     * 'test+test'                     => 'test+test'
5221
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5222
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5223
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5224
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5225
     * 'Düsseldorf'                   => 'Düsseldorf'
5226
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5227
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5228
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5229
     *
5230
     * @param string $str          <p>The input string.</p>
5231
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5232
     *
5233
     * @psalm-pure
5234
     *
5235
     * @return string
5236
     *                <p>The decoded URL, as a string.</p>
5237
     */
5238
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5239
    {
5240 7
        if ($str === '') {
5241 4
            return '';
5242
        }
5243
5244
        if (
5245 7
            \strpos($str, '&') === false
5246
            &&
5247 7
            \strpos($str, '%') === false
5248
            &&
5249 7
            \strpos($str, '+') === false
5250
            &&
5251 7
            \strpos($str, '\u') === false
5252
        ) {
5253 4
            return self::fix_simple_utf8($str);
5254
        }
5255
5256 7
        $str = self::urldecode_unicode_helper($str);
5257
5258 7
        if ($multi_decode) {
5259
            do {
5260 6
                $str_compare = $str;
5261
5262
                /**
5263
                 * @psalm-suppress PossiblyInvalidArgument
5264
                 */
5265 6
                $str = self::fix_simple_utf8(
5266 6
                    \rawurldecode(
5267 6
                        self::html_entity_decode(
5268 6
                            self::to_utf8($str),
5269 6
                            \ENT_QUOTES | \ENT_HTML5
5270
                        )
5271
                    )
5272
                );
5273 6
            } while ($str_compare !== $str);
5274
        } else {
5275
            /**
5276
             * @psalm-suppress PossiblyInvalidArgument
5277
             */
5278 1
            $str = self::fix_simple_utf8(
5279 1
                \rawurldecode(
5280 1
                    self::html_entity_decode(
5281 1
                        self::to_utf8($str),
5282 1
                        \ENT_QUOTES | \ENT_HTML5
5283
                    )
5284
                )
5285
            );
5286
        }
5287
5288 7
        return $str;
5289
    }
5290
5291
    /**
5292
     * Replaces all occurrences of $pattern in $str by $replacement.
5293
     *
5294
     * @param string $str         <p>The input string.</p>
5295
     * @param string $pattern     <p>The regular expression pattern.</p>
5296
     * @param string $replacement <p>The string to replace with.</p>
5297
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5298
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5299
     *
5300
     * @psalm-pure
5301
     *
5302
     * @return string
5303
     */
5304
    public static function regex_replace(
5305
        string $str,
5306
        string $pattern,
5307
        string $replacement,
5308
        string $options = '',
5309
        string $delimiter = '/'
5310
    ): string {
5311 18
        if ($options === 'msr') {
5312 9
            $options = 'ms';
5313
        }
5314
5315
        // fallback
5316 18
        if (!$delimiter) {
5317
            $delimiter = '/';
5318
        }
5319
5320 18
        return (string) \preg_replace(
5321 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5322 18
            $replacement,
5323 18
            $str
5324
        );
5325
    }
5326
5327
    /**
5328
     * alias for "UTF8::remove_bom()"
5329
     *
5330
     * @param string $str
5331
     *
5332
     * @psalm-pure
5333
     *
5334
     * @return string
5335
     *
5336
     * @see        UTF8::remove_bom()
5337
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5338
     */
5339
    public static function removeBOM(string $str): string
5340
    {
5341 1
        return self::remove_bom($str);
5342
    }
5343
5344
    /**
5345
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5346
     *
5347
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5348
     *
5349
     * @param string $str <p>The input string.</p>
5350
     *
5351
     * @psalm-pure
5352
     *
5353
     * @return string
5354
     *                <p>A string without UTF-BOM.</p>
5355
     */
5356
    public static function remove_bom(string $str): string
5357
    {
5358 55
        if ($str === '') {
5359 9
            return '';
5360
        }
5361
5362 55
        $str_length = \strlen($str);
5363 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5364 55
            if (\strpos($str, $bom_string) === 0) {
5365
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5366 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5367 11
                if ($str_tmp === false) {
5368
                    return '';
5369
                }
5370
5371 11
                $str_length -= (int) $bom_byte_length;
5372
5373 55
                $str = (string) $str_tmp;
5374
            }
5375
        }
5376
5377 55
        return $str;
5378
    }
5379
5380
    /**
5381
     * Removes duplicate occurrences of a string in another string.
5382
     *
5383
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5384
     *
5385
     * @param string          $str  <p>The base string.</p>
5386
     * @param string|string[] $what <p>String to search for in the base string.</p>
5387
     *
5388
     * @psalm-pure
5389
     *
5390
     * @return string
5391
     *                <p>A string with removed duplicates.</p>
5392
     */
5393
    public static function remove_duplicates(string $str, $what = ' '): string
5394
    {
5395 2
        if (\is_string($what)) {
5396 2
            $what = [$what];
5397
        }
5398
5399
        /**
5400
         * @psalm-suppress RedundantConditionGivenDocblockType
5401
         */
5402 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5403 2
            foreach ($what as $item) {
5404 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5405
            }
5406
        }
5407
5408 2
        return $str;
5409
    }
5410
5411
    /**
5412
     * Remove html via "strip_tags()" from the string.
5413
     *
5414
     * @param string $str            <p>The input string.</p>
5415
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5416
     *                               should not be stripped. Default: null
5417
     *                               </p>
5418
     *
5419
     * @psalm-pure
5420
     *
5421
     * @return string
5422
     *                <p>A string with without html tags.</p>
5423
     */
5424
    public static function remove_html(string $str, string $allowable_tags = ''): string
5425
    {
5426 6
        return \strip_tags($str, $allowable_tags);
5427
    }
5428
5429
    /**
5430
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5431
     *
5432
     * @param string $str         <p>The input string.</p>
5433
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5434
     *
5435
     * @psalm-pure
5436
     *
5437
     * @return string
5438
     *                <p>A string without breaks.</p>
5439
     */
5440
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5441
    {
5442 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5443
    }
5444
5445
    /**
5446
     * Remove invisible characters from a string.
5447
     *
5448
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5449
     *
5450
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5451
     *
5452
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5453
     *
5454
     * @param string $str         <p>The input string.</p>
5455
     * @param bool   $url_encoded [optional] <p>
5456
     *                            Try to remove url encoded control character.
5457
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5458
     *                            <br>
5459
     *                            Default: false
5460
     *                            </p>
5461
     * @param string $replacement [optional] <p>The replacement character.</p>
5462
     *
5463
     * @psalm-pure
5464
     *
5465
     * @return string
5466
     *                <p>A string without invisible chars.</p>
5467
     */
5468
    public static function remove_invisible_characters(
5469
        string $str,
5470
        bool $url_encoded = false,
5471
        string $replacement = ''
5472
    ): string {
5473 91
        return ASCII::remove_invisible_characters(
5474 91
            $str,
5475 91
            $url_encoded,
5476 91
            $replacement
5477
        );
5478
    }
5479
5480
    /**
5481
     * Returns a new string with the prefix $substring removed, if present.
5482
     *
5483
     * @param string $str       <p>The input string.</p>
5484
     * @param string $substring <p>The prefix to remove.</p>
5485
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5486
     *
5487
     * @psalm-pure
5488
     *
5489
     * @return string
5490
     *                <p>A string without the prefix $substring.</p>
5491
     */
5492
    public static function remove_left(
5493
        string $str,
5494
        string $substring,
5495
        string $encoding = 'UTF-8'
5496
    ): string {
5497 12
        if ($substring && \strpos($str, $substring) === 0) {
5498 6
            if ($encoding === 'UTF-8') {
5499 4
                return (string) \mb_substr(
5500 4
                    $str,
5501 4
                    (int) \mb_strlen($substring)
5502
                );
5503
            }
5504
5505 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5506
5507 2
            return (string) self::substr(
5508 2
                $str,
5509 2
                (int) self::strlen($substring, $encoding),
5510 2
                null,
5511 2
                $encoding
5512
            );
5513
        }
5514
5515 6
        return $str;
5516
    }
5517
5518
    /**
5519
     * Returns a new string with the suffix $substring removed, if present.
5520
     *
5521
     * @param string $str
5522
     * @param string $substring <p>The suffix to remove.</p>
5523
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5524
     *
5525
     * @psalm-pure
5526
     *
5527
     * @return string
5528
     *                <p>A string having a $str without the suffix $substring.</p>
5529
     */
5530
    public static function remove_right(
5531
        string $str,
5532
        string $substring,
5533
        string $encoding = 'UTF-8'
5534
    ): string {
5535 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5536 6
            if ($encoding === 'UTF-8') {
5537 4
                return (string) \mb_substr(
5538 4
                    $str,
5539 4
                    0,
5540 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5541
                );
5542
            }
5543
5544 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5545
5546 2
            return (string) self::substr(
5547 2
                $str,
5548 2
                0,
5549 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5550 2
                $encoding
5551
            );
5552
        }
5553
5554 6
        return $str;
5555
    }
5556
5557
    /**
5558
     * Replaces all occurrences of $search in $str by $replacement.
5559
     *
5560
     * @param string $str            <p>The input string.</p>
5561
     * @param string $search         <p>The needle to search for.</p>
5562
     * @param string $replacement    <p>The string to replace with.</p>
5563
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5564
     *
5565
     * @psalm-pure
5566
     *
5567
     * @return string
5568
     *                <p>A string with replaced parts.</p>
5569
     */
5570
    public static function replace(
5571
        string $str,
5572
        string $search,
5573
        string $replacement,
5574
        bool $case_sensitive = true
5575
    ): string {
5576 29
        if ($case_sensitive) {
5577 22
            return \str_replace($search, $replacement, $str);
5578
        }
5579
5580 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5581
    }
5582
5583
    /**
5584
     * Replaces all occurrences of $search in $str by $replacement.
5585
     *
5586
     * @param string       $str            <p>The input string.</p>
5587
     * @param array        $search         <p>The elements to search for.</p>
5588
     * @param array|string $replacement    <p>The string to replace with.</p>
5589
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5590
     *
5591
     * @psalm-pure
5592
     *
5593
     * @return string
5594
     *                <p>A string with replaced parts.</p>
5595
     */
5596
    public static function replace_all(
5597
        string $str,
5598
        array $search,
5599
        $replacement,
5600
        bool $case_sensitive = true
5601
    ): string {
5602 30
        if ($case_sensitive) {
5603 23
            return \str_replace($search, $replacement, $str);
5604
        }
5605
5606 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5607
    }
5608
5609
    /**
5610
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5611
     *
5612
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5613
     *
5614
     * @param string $str                        <p>The input string</p>
5615
     * @param string $replacement_char           <p>The replacement character.</p>
5616
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5617
     *
5618
     * @psalm-pure
5619
     *
5620
     * @return string
5621
     *                <p>A string without diamond question marks (�).</p>
5622
     */
5623
    public static function replace_diamond_question_mark(
5624
        string $str,
5625
        string $replacement_char = '',
5626
        bool $process_invalid_utf8_chars = true
5627
    ): string {
5628 35
        if ($str === '') {
5629 9
            return '';
5630
        }
5631
5632 35
        if ($process_invalid_utf8_chars) {
5633 35
            $replacement_char_helper = $replacement_char;
5634 35
            if ($replacement_char === '') {
5635 35
                $replacement_char_helper = 'none';
5636
            }
5637
5638 35
            if (self::$SUPPORT['mbstring'] === false) {
5639
                // if there is no native support for "mbstring",
5640
                // then we need to clean the string before ...
5641
                $str = self::clean($str);
5642
            }
5643
5644
            /**
5645
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5646
             */
5647 35
            $save = \mb_substitute_character();
5648
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5649 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5649
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5650
            // the polyfill maybe return false, so cast to string
5651 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5652 35
            \mb_substitute_character($save);
5653
        }
5654
5655 35
        return \str_replace(
5656
            [
5657 35
                "\xEF\xBF\xBD",
5658
                '�',
5659
            ],
5660
            [
5661 35
                $replacement_char,
5662 35
                $replacement_char,
5663
            ],
5664 35
            $str
5665
        );
5666
    }
5667
5668
    /**
5669
     * Strip whitespace or other characters from the end of a UTF-8 string.
5670
     *
5671
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5672
     *
5673
     * @param string      $str   <p>The string to be trimmed.</p>
5674
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5675
     *
5676
     * @psalm-pure
5677
     *
5678
     * @return string
5679
     *                <p>A string with unwanted characters stripped from the right.</p>
5680
     */
5681
    public static function rtrim(string $str = '', string $chars = null): string
5682
    {
5683 21
        if ($str === '') {
5684 3
            return '';
5685
        }
5686
5687 20
        if (self::$SUPPORT['mbstring'] === true) {
5688 20
            if ($chars !== null) {
5689
                /** @noinspection PregQuoteUsageInspection */
5690 9
                $chars = \preg_quote($chars);
5691 9
                $pattern = "[${chars}]+$";
5692
            } else {
5693 14
                $pattern = '[\\s]+$';
5694
            }
5695
5696
            /** @noinspection PhpComposerExtensionStubsInspection */
5697 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5698
        }
5699
5700
        if ($chars !== null) {
5701
            $chars = \preg_quote($chars, '/');
5702
            $pattern = "[${chars}]+$";
5703
        } else {
5704
            $pattern = '[\\s]+$';
5705
        }
5706
5707
        return self::regex_replace($str, $pattern, '');
5708
    }
5709
5710
    /**
5711
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5712
     *
5713
     * @param bool $useEcho
5714
     *
5715
     * @psalm-pure
5716
     *
5717
     * @return string|void
5718
     */
5719
    public static function showSupport(bool $useEcho = true)
5720
    {
5721
        // init
5722 2
        $html = '';
5723
5724 2
        $html .= '<pre>';
5725
        /** @noinspection AlterInForeachInspection */
5726 2
        foreach (self::$SUPPORT as $key => &$value) {
5727 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5728
        }
5729 2
        $html .= '</pre>';
5730
5731 2
        if ($useEcho) {
5732 1
            echo $html;
5733
        }
5734
5735 2
        return $html;
5736
    }
5737
5738
    /**
5739
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5740
     *
5741
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5742
     *
5743
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5744
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5745
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5746
     *
5747
     * @psalm-pure
5748
     *
5749
     * @return string
5750
     *                <p>The HTML numbered entity for the given character.</p>
5751
     */
5752
    public static function single_chr_html_encode(
5753
        string $char,
5754
        bool $keep_ascii_chars = false,
5755
        string $encoding = 'UTF-8'
5756
    ): string {
5757 2
        if ($char === '') {
5758 2
            return '';
5759
        }
5760
5761
        if (
5762 2
            $keep_ascii_chars
5763
            &&
5764 2
            ASCII::is_ascii($char)
5765
        ) {
5766 2
            return $char;
5767
        }
5768
5769 2
        return '&#' . self::ord($char, $encoding) . ';';
5770
    }
5771
5772
    /**
5773
     * @param string $str
5774
     * @param int    $tab_length
5775
     *
5776
     * @psalm-pure
5777
     *
5778
     * @return string
5779
     */
5780
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5781
    {
5782 5
        if ($tab_length === 4) {
5783 3
            $tab = '    ';
5784 2
        } elseif ($tab_length === 2) {
5785 1
            $tab = '  ';
5786
        } else {
5787 1
            $tab = \str_repeat(' ', $tab_length);
5788
        }
5789
5790 5
        return \str_replace($tab, "\t", $str);
5791
    }
5792
5793
    /**
5794
     * alias for "UTF8::str_split()"
5795
     *
5796
     * @param int|string $str
5797
     * @param int        $length
5798
     * @param bool       $clean_utf8
5799
     *
5800
     * @psalm-pure
5801
     *
5802
     * @return string[]
5803
     *
5804
     * @see        UTF8::str_split()
5805
     * @deprecated <p>please use "UTF8::str_split()"</p>
5806
     */
5807
    public static function split(
5808
        $str,
5809
        int $length = 1,
5810
        bool $clean_utf8 = false
5811
    ): array {
5812
        /** @var string[] */
5813 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5814
    }
5815
5816
    /**
5817
     * alias for "UTF8::str_starts_with()"
5818
     *
5819
     * @param string $haystack
5820
     * @param string $needle
5821
     *
5822
     * @psalm-pure
5823
     *
5824
     * @return bool
5825
     *
5826
     * @see        UTF8::str_starts_with()
5827
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5828
     */
5829
    public static function str_begins(string $haystack, string $needle): bool
5830
    {
5831 1
        return self::str_starts_with($haystack, $needle);
5832
    }
5833
5834
    /**
5835
     * Returns a camelCase version of the string. Trims surrounding spaces,
5836
     * capitalizes letters following digits, spaces, dashes and underscores,
5837
     * and removes spaces, dashes, as well as underscores.
5838
     *
5839
     * @param string      $str                           <p>The input string.</p>
5840
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5841
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5842
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5843
     *                                                   tr</p>
5844
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5845
     *                                                   -> ß</p>
5846
     *
5847
     * @psalm-pure
5848
     *
5849
     * @return string
5850
     */
5851
    public static function str_camelize(
5852
        string $str,
5853
        string $encoding = 'UTF-8',
5854
        bool $clean_utf8 = false,
5855
        string $lang = null,
5856
        bool $try_to_keep_the_string_length = false
5857
    ): string {
5858 32
        if ($clean_utf8) {
5859
            $str = self::clean($str);
5860
        }
5861
5862 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5863 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5864
        }
5865
5866 32
        $str = self::lcfirst(
5867 32
            \trim($str),
5868 32
            $encoding,
5869 32
            false,
5870 32
            $lang,
5871 32
            $try_to_keep_the_string_length
5872
        );
5873 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5874
5875 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5876
5877 32
        $str = (string) \preg_replace_callback(
5878 32
            '/[-_\\s]+(.)?/u',
5879
            /**
5880
             * @param array $match
5881
             *
5882
             * @psalm-pure
5883
             *
5884
             * @return string
5885
             */
5886
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5887 27
                if (isset($match[1])) {
5888 27
                    if ($use_mb_functions) {
5889 27
                        if ($encoding === 'UTF-8') {
5890 27
                            return \mb_strtoupper($match[1]);
5891
                        }
5892
5893
                        return \mb_strtoupper($match[1], $encoding);
5894
                    }
5895
5896
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5897
                }
5898
5899 1
                return '';
5900 32
            },
5901 32
            $str
5902
        );
5903
5904 32
        return (string) \preg_replace_callback(
5905 32
            '/[\\p{N}]+(.)?/u',
5906
            /**
5907
             * @param array $match
5908
             *
5909
             * @psalm-pure
5910
             *
5911
             * @return string
5912
             */
5913
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5914 6
                if ($use_mb_functions) {
5915 6
                    if ($encoding === 'UTF-8') {
5916 6
                        return \mb_strtoupper($match[0]);
5917
                    }
5918
5919
                    return \mb_strtoupper($match[0], $encoding);
5920
                }
5921
5922
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5923 32
            },
5924 32
            $str
5925
        );
5926
    }
5927
5928
    /**
5929
     * Returns the string with the first letter of each word capitalized,
5930
     * except for when the word is a name which shouldn't be capitalized.
5931
     *
5932
     * @param string $str
5933
     *
5934
     * @psalm-pure
5935
     *
5936
     * @return string
5937
     *                <p>A string with $str capitalized.</p>
5938
     */
5939
    public static function str_capitalize_name(string $str): string
5940
    {
5941 1
        return self::str_capitalize_name_helper(
5942 1
            self::str_capitalize_name_helper(
5943 1
                self::collapse_whitespace($str),
5944 1
                ' '
5945
            ),
5946 1
            '-'
5947
        );
5948
    }
5949
5950
    /**
5951
     * Returns true if the string contains $needle, false otherwise. By default
5952
     * the comparison is case-sensitive, but can be made insensitive by setting
5953
     * $case_sensitive to false.
5954
     *
5955
     * @param string $haystack       <p>The input string.</p>
5956
     * @param string $needle         <p>Substring to look for.</p>
5957
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5958
     *
5959
     * @psalm-pure
5960
     *
5961
     * @return bool whether or not $haystack contains $needle
5962
     */
5963
    public static function str_contains(
5964
        string $haystack,
5965
        string $needle,
5966
        bool $case_sensitive = true
5967
    ): bool {
5968 21
        if ($case_sensitive) {
5969 11
            return \strpos($haystack, $needle) !== false;
5970
        }
5971
5972 10
        return \mb_stripos($haystack, $needle) !== false;
5973
    }
5974
5975
    /**
5976
     * Returns true if the string contains all $needles, false otherwise. By
5977
     * default the comparison is case-sensitive, but can be made insensitive by
5978
     * setting $case_sensitive to false.
5979
     *
5980
     * @param string $haystack       <p>The input string.</p>
5981
     * @param array  $needles        <p>SubStrings to look for.</p>
5982
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5983
     *
5984
     * @psalm-pure
5985
     *
5986
     * @return bool whether or not $haystack contains $needle
5987
     */
5988
    public static function str_contains_all(
5989
        string $haystack,
5990
        array $needles,
5991
        bool $case_sensitive = true
5992
    ): bool {
5993 45
        if ($haystack === '' || $needles === []) {
5994 1
            return false;
5995
        }
5996
5997
        /** @noinspection LoopWhichDoesNotLoopInspection */
5998 44
        foreach ($needles as &$needle) {
5999 44
            if ($case_sensitive) {
6000
                /** @noinspection NestedPositiveIfStatementsInspection */
6001 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6002 12
                    return false;
6003
                }
6004
            }
6005
6006 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6007 33
                return false;
6008
            }
6009
        }
6010
6011 24
        return true;
6012
    }
6013
6014
    /**
6015
     * Returns true if the string contains any $needles, false otherwise. By
6016
     * default the comparison is case-sensitive, but can be made insensitive by
6017
     * setting $case_sensitive to false.
6018
     *
6019
     * @param string $haystack       <p>The input string.</p>
6020
     * @param array  $needles        <p>SubStrings to look for.</p>
6021
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6022
     *
6023
     * @psalm-pure
6024
     *
6025
     * @return bool
6026
     *              Whether or not $str contains $needle
6027
     */
6028
    public static function str_contains_any(
6029
        string $haystack,
6030
        array $needles,
6031
        bool $case_sensitive = true
6032
    ): bool {
6033 46
        if ($haystack === '' || $needles === []) {
6034 1
            return false;
6035
        }
6036
6037
        /** @noinspection LoopWhichDoesNotLoopInspection */
6038 45
        foreach ($needles as &$needle) {
6039 45
            if (!$needle) {
6040
                continue;
6041
            }
6042
6043 45
            if ($case_sensitive) {
6044 25
                if (\strpos($haystack, $needle) !== false) {
6045 14
                    return true;
6046
                }
6047
6048 13
                continue;
6049
            }
6050
6051 20
            if (\mb_stripos($haystack, $needle) !== false) {
6052 20
                return true;
6053
            }
6054
        }
6055
6056 19
        return false;
6057
    }
6058
6059
    /**
6060
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6061
     * inserted before uppercase characters (with the exception of the first
6062
     * character of the string), and in place of spaces as well as underscores.
6063
     *
6064
     * @param string $str      <p>The input string.</p>
6065
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6066
     *
6067
     * @psalm-pure
6068
     *
6069
     * @return string
6070
     */
6071
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6072
    {
6073 19
        return self::str_delimit($str, '-', $encoding);
6074
    }
6075
6076
    /**
6077
     * Returns a lowercase and trimmed string separated by the given delimiter.
6078
     * Delimiters are inserted before uppercase characters (with the exception
6079
     * of the first character of the string), and in place of spaces, dashes,
6080
     * and underscores. Alpha delimiters are not converted to lowercase.
6081
     *
6082
     * @param string      $str                           <p>The input string.</p>
6083
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6084
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6085
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6086
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6087
     *                                                   tr</p>
6088
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6089
     *                                                   ß</p>
6090
     *
6091
     * @psalm-pure
6092
     *
6093
     * @return string
6094
     */
6095
    public static function str_delimit(
6096
        string $str,
6097
        string $delimiter,
6098
        string $encoding = 'UTF-8',
6099
        bool $clean_utf8 = false,
6100
        string $lang = null,
6101
        bool $try_to_keep_the_string_length = false
6102
    ): string {
6103 49
        if (self::$SUPPORT['mbstring'] === true) {
6104
            /** @noinspection PhpComposerExtensionStubsInspection */
6105 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6106
6107 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6108 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6109 22
                $str = \mb_strtolower($str);
6110
            } else {
6111 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6112
            }
6113
6114
            /** @noinspection PhpComposerExtensionStubsInspection */
6115 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6116
        }
6117
6118
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6119
6120
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6121
        if ($use_mb_functions && $encoding === 'UTF-8') {
6122
            $str = \mb_strtolower($str);
6123
        } else {
6124
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6125
        }
6126
6127
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6128
    }
6129
6130
    /**
6131
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6132
     *
6133
     * EXAMPLE: <code>
6134
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6135
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6136
     * </code>
6137
     *
6138
     * @param string $str <p>The input string.</p>
6139
     *
6140
     * @psalm-pure
6141
     *
6142
     * @return false|string
6143
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6144
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6145
     */
6146
    public static function str_detect_encoding($str)
6147
    {
6148
        // init
6149 31
        $str = (string) $str;
6150
6151
        //
6152
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6153
        //
6154
6155 31
        if (self::is_binary($str, true)) {
6156 11
            $is_utf32 = self::is_utf32($str, false);
6157 11
            if ($is_utf32 === 1) {
6158
                return 'UTF-32LE';
6159
            }
6160 11
            if ($is_utf32 === 2) {
6161 1
                return 'UTF-32BE';
6162
            }
6163
6164 11
            $is_utf16 = self::is_utf16($str, false);
6165 11
            if ($is_utf16 === 1) {
6166 3
                return 'UTF-16LE';
6167
            }
6168 11
            if ($is_utf16 === 2) {
6169 2
                return 'UTF-16BE';
6170
            }
6171
6172
            // is binary but not "UTF-16" or "UTF-32"
6173 9
            return false;
6174
        }
6175
6176
        //
6177
        // 2.) simple check for ASCII chars
6178
        //
6179
6180 27
        if (ASCII::is_ascii($str)) {
6181 10
            return 'ASCII';
6182
        }
6183
6184
        //
6185
        // 3.) simple check for UTF-8 chars
6186
        //
6187
6188 27
        if (self::is_utf8_string($str)) {
6189 19
            return 'UTF-8';
6190
        }
6191
6192
        //
6193
        // 4.) check via "mb_detect_encoding()"
6194
        //
6195
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6196
6197
        $encoding_detecting_order = [
6198 16
            'ISO-8859-1',
6199
            'ISO-8859-2',
6200
            'ISO-8859-3',
6201
            'ISO-8859-4',
6202
            'ISO-8859-5',
6203
            'ISO-8859-6',
6204
            'ISO-8859-7',
6205
            'ISO-8859-8',
6206
            'ISO-8859-9',
6207
            'ISO-8859-10',
6208
            'ISO-8859-13',
6209
            'ISO-8859-14',
6210
            'ISO-8859-15',
6211
            'ISO-8859-16',
6212
            'WINDOWS-1251',
6213
            'WINDOWS-1252',
6214
            'WINDOWS-1254',
6215
            'CP932',
6216
            'CP936',
6217
            'CP950',
6218
            'CP866',
6219
            'CP850',
6220
            'CP51932',
6221
            'CP50220',
6222
            'CP50221',
6223
            'CP50222',
6224
            'ISO-2022-JP',
6225
            'ISO-2022-KR',
6226
            'JIS',
6227
            'JIS-ms',
6228
            'EUC-CN',
6229
            'EUC-JP',
6230
        ];
6231
6232 16
        if (self::$SUPPORT['mbstring'] === true) {
6233
            // info: do not use the symfony polyfill here
6234 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6235 16
            if ($encoding) {
6236 16
                return $encoding;
6237
            }
6238
        }
6239
6240
        //
6241
        // 5.) check via "iconv()"
6242
        //
6243
6244
        if (self::$ENCODINGS === null) {
6245
            self::$ENCODINGS = self::getData('encodings');
6246
        }
6247
6248
        foreach (self::$ENCODINGS as $encoding_tmp) {
6249
            // INFO: //IGNORE but still throw notice
6250
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6251
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6252
                return $encoding_tmp;
6253
            }
6254
        }
6255
6256
        return false;
6257
    }
6258
6259
    /**
6260
     * alias for "UTF8::str_ends_with()"
6261
     *
6262
     * @param string $haystack
6263
     * @param string $needle
6264
     *
6265
     * @psalm-pure
6266
     *
6267
     * @return bool
6268
     *
6269
     * @see        UTF8::str_ends_with()
6270
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6271
     */
6272
    public static function str_ends(string $haystack, string $needle): bool
6273
    {
6274 1
        return self::str_ends_with($haystack, $needle);
6275
    }
6276
6277
    /**
6278
     * Check if the string ends with the given substring.
6279
     *
6280
     * EXAMPLE: <code>
6281
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6282
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6283
     * </code>
6284
     *
6285
     * @param string $haystack <p>The string to search in.</p>
6286
     * @param string $needle   <p>The substring to search for.</p>
6287
     *
6288
     * @psalm-pure
6289
     *
6290
     * @return bool
6291
     */
6292
    public static function str_ends_with(string $haystack, string $needle): bool
6293
    {
6294 9
        if ($needle === '') {
6295 2
            return true;
6296
        }
6297
6298 9
        if ($haystack === '') {
6299
            return false;
6300
        }
6301
6302 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6303
    }
6304
6305
    /**
6306
     * Returns true if the string ends with any of $substrings, false otherwise.
6307
     *
6308
     * - case-sensitive
6309
     *
6310
     * @param string   $str        <p>The input string.</p>
6311
     * @param string[] $substrings <p>Substrings to look for.</p>
6312
     *
6313
     * @psalm-pure
6314
     *
6315
     * @return bool whether or not $str ends with $substring
6316
     */
6317
    public static function str_ends_with_any(string $str, array $substrings): bool
6318
    {
6319 7
        if ($substrings === []) {
6320
            return false;
6321
        }
6322
6323 7
        foreach ($substrings as &$substring) {
6324 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6325 7
                return true;
6326
            }
6327
        }
6328
6329 6
        return false;
6330
    }
6331
6332
    /**
6333
     * Ensures that the string begins with $substring. If it doesn't, it's
6334
     * prepended.
6335
     *
6336
     * @param string $str       <p>The input string.</p>
6337
     * @param string $substring <p>The substring to add if not present.</p>
6338
     *
6339
     * @psalm-pure
6340
     *
6341
     * @return string
6342
     */
6343
    public static function str_ensure_left(string $str, string $substring): string
6344
    {
6345
        if (
6346 10
            $substring !== ''
6347
            &&
6348 10
            \strpos($str, $substring) === 0
6349
        ) {
6350 6
            return $str;
6351
        }
6352
6353 4
        return $substring . $str;
6354
    }
6355
6356
    /**
6357
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6358
     *
6359
     * @param string $str       <p>The input string.</p>
6360
     * @param string $substring <p>The substring to add if not present.</p>
6361
     *
6362
     * @psalm-pure
6363
     *
6364
     * @return string
6365
     */
6366
    public static function str_ensure_right(string $str, string $substring): string
6367
    {
6368
        if (
6369 10
            $str === ''
6370
            ||
6371 10
            $substring === ''
6372
            ||
6373 10
            \substr($str, -\strlen($substring)) !== $substring
6374
        ) {
6375 4
            $str .= $substring;
6376
        }
6377
6378 10
        return $str;
6379
    }
6380
6381
    /**
6382
     * Capitalizes the first word of the string, replaces underscores with
6383
     * spaces, and strips '_id'.
6384
     *
6385
     * @param string $str
6386
     *
6387
     * @psalm-pure
6388
     *
6389
     * @return string
6390
     */
6391
    public static function str_humanize($str): string
6392
    {
6393 3
        $str = \str_replace(
6394
            [
6395 3
                '_id',
6396
                '_',
6397
            ],
6398
            [
6399 3
                '',
6400
                ' ',
6401
            ],
6402 3
            $str
6403
        );
6404
6405 3
        return self::ucfirst(\trim($str));
6406
    }
6407
6408
    /**
6409
     * alias for "UTF8::str_istarts_with()"
6410
     *
6411
     * @param string $haystack
6412
     * @param string $needle
6413
     *
6414
     * @psalm-pure
6415
     *
6416
     * @return bool
6417
     *
6418
     * @see        UTF8::str_istarts_with()
6419
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6420
     */
6421
    public static function str_ibegins(string $haystack, string $needle): bool
6422
    {
6423 1
        return self::str_istarts_with($haystack, $needle);
6424
    }
6425
6426
    /**
6427
     * alias for "UTF8::str_iends_with()"
6428
     *
6429
     * @param string $haystack
6430
     * @param string $needle
6431
     *
6432
     * @psalm-pure
6433
     *
6434
     * @return bool
6435
     *
6436
     * @see        UTF8::str_iends_with()
6437
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6438
     */
6439
    public static function str_iends(string $haystack, string $needle): bool
6440
    {
6441 1
        return self::str_iends_with($haystack, $needle);
6442
    }
6443
6444
    /**
6445
     * Check if the string ends with the given substring, case-insensitive.
6446
     *
6447
     * EXAMPLE: <code>
6448
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6449
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6450
     * </code>
6451
     *
6452
     * @param string $haystack <p>The string to search in.</p>
6453
     * @param string $needle   <p>The substring to search for.</p>
6454
     *
6455
     * @psalm-pure
6456
     *
6457
     * @return bool
6458
     */
6459
    public static function str_iends_with(string $haystack, string $needle): bool
6460
    {
6461 12
        if ($needle === '') {
6462 2
            return true;
6463
        }
6464
6465 12
        if ($haystack === '') {
6466
            return false;
6467
        }
6468
6469 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6470
    }
6471
6472
    /**
6473
     * Returns true if the string ends with any of $substrings, false otherwise.
6474
     *
6475
     * - case-insensitive
6476
     *
6477
     * @param string   $str        <p>The input string.</p>
6478
     * @param string[] $substrings <p>Substrings to look for.</p>
6479
     *
6480
     * @psalm-pure
6481
     *
6482
     * @return bool
6483
     *              <p>Whether or not $str ends with $substring.</p>
6484
     */
6485
    public static function str_iends_with_any(string $str, array $substrings): bool
6486
    {
6487 4
        if ($substrings === []) {
6488
            return false;
6489
        }
6490
6491 4
        foreach ($substrings as &$substring) {
6492 4
            if (self::str_iends_with($str, $substring)) {
6493 4
                return true;
6494
            }
6495
        }
6496
6497
        return false;
6498
    }
6499
6500
    /**
6501
     * Returns the index of the first occurrence of $needle in the string,
6502
     * and false if not found. Accepts an optional offset from which to begin
6503
     * the search.
6504
     *
6505
     * @param string $str      <p>The input string.</p>
6506
     * @param string $needle   <p>Substring to look for.</p>
6507
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6508
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6509
     *
6510
     * @psalm-pure
6511
     *
6512
     * @return false|int
6513
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6514
     *
6515
     * @see        UTF8::stripos()
6516
     * @deprecated <p>please use "UTF8::stripos()"</p>
6517
     */
6518
    public static function str_iindex_first(
6519
        string $str,
6520
        string $needle,
6521
        int $offset = 0,
6522
        string $encoding = 'UTF-8'
6523
    ) {
6524 1
        return self::stripos(
6525 1
            $str,
6526 1
            $needle,
6527 1
            $offset,
6528 1
            $encoding
6529
        );
6530
    }
6531
6532
    /**
6533
     * Returns the index of the last occurrence of $needle in the string,
6534
     * and false if not found. Accepts an optional offset from which to begin
6535
     * the search. Offsets may be negative to count from the last character
6536
     * in the string.
6537
     *
6538
     * @param string $str      <p>The input string.</p>
6539
     * @param string $needle   <p>Substring to look for.</p>
6540
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6541
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6542
     *
6543
     * @psalm-pure
6544
     *
6545
     * @return false|int
6546
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6547
     *
6548
     * @see        UTF8::strripos()
6549
     * @deprecated <p>please use "UTF8::strripos()"</p>
6550
     */
6551
    public static function str_iindex_last(
6552
        string $str,
6553
        string $needle,
6554
        int $offset = 0,
6555
        string $encoding = 'UTF-8'
6556
    ) {
6557 10
        return self::strripos(
6558 10
            $str,
6559 10
            $needle,
6560 10
            $offset,
6561 10
            $encoding
6562
        );
6563
    }
6564
6565
    /**
6566
     * Returns the index of the first occurrence of $needle in the string,
6567
     * and false if not found. Accepts an optional offset from which to begin
6568
     * the search.
6569
     *
6570
     * @param string $str      <p>The input string.</p>
6571
     * @param string $needle   <p>Substring to look for.</p>
6572
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6573
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6574
     *
6575
     * @psalm-pure
6576
     *
6577
     * @return false|int
6578
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6579
     *
6580
     * @see        UTF8::strpos()
6581
     * @deprecated <p>please use "UTF8::strpos()"</p>
6582
     */
6583
    public static function str_index_first(
6584
        string $str,
6585
        string $needle,
6586
        int $offset = 0,
6587
        string $encoding = 'UTF-8'
6588
    ) {
6589 11
        return self::strpos(
6590 11
            $str,
6591 11
            $needle,
6592 11
            $offset,
6593 11
            $encoding
6594
        );
6595
    }
6596
6597
    /**
6598
     * Returns the index of the last occurrence of $needle in the string,
6599
     * and false if not found. Accepts an optional offset from which to begin
6600
     * the search. Offsets may be negative to count from the last character
6601
     * in the string.
6602
     *
6603
     * @param string $str      <p>The input string.</p>
6604
     * @param string $needle   <p>Substring to look for.</p>
6605
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6606
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6607
     *
6608
     * @psalm-pure
6609
     *
6610
     * @return false|int
6611
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6612
     *
6613
     * @see        UTF8::strrpos()
6614
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6615
     */
6616
    public static function str_index_last(
6617
        string $str,
6618
        string $needle,
6619
        int $offset = 0,
6620
        string $encoding = 'UTF-8'
6621
    ) {
6622 10
        return self::strrpos(
6623 10
            $str,
6624 10
            $needle,
6625 10
            $offset,
6626 10
            $encoding
6627
        );
6628
    }
6629
6630
    /**
6631
     * Inserts $substring into the string at the $index provided.
6632
     *
6633
     * @param string $str       <p>The input string.</p>
6634
     * @param string $substring <p>String to be inserted.</p>
6635
     * @param int    $index     <p>The index at which to insert the substring.</p>
6636
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6637
     *
6638
     * @psalm-pure
6639
     *
6640
     * @return string
6641
     */
6642
    public static function str_insert(
6643
        string $str,
6644
        string $substring,
6645
        int $index,
6646
        string $encoding = 'UTF-8'
6647
    ): string {
6648 8
        if ($encoding === 'UTF-8') {
6649 4
            $len = (int) \mb_strlen($str);
6650 4
            if ($index > $len) {
6651
                return $str;
6652
            }
6653
6654
            /** @noinspection UnnecessaryCastingInspection */
6655 4
            return (string) \mb_substr($str, 0, $index) .
6656 4
                   $substring .
6657 4
                   (string) \mb_substr($str, $index, $len);
6658
        }
6659
6660 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6661
6662 4
        $len = (int) self::strlen($str, $encoding);
6663 4
        if ($index > $len) {
6664 1
            return $str;
6665
        }
6666
6667 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6668 3
               $substring .
6669 3
               ((string) self::substr($str, $index, $len, $encoding));
6670
    }
6671
6672
    /**
6673
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6674
     *
6675
     * EXAMPLE: <code>
6676
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6677
     * </code>
6678
     *
6679
     * @see http://php.net/manual/en/function.str-ireplace.php
6680
     *
6681
     * @param string|string[] $search      <p>
6682
     *                                     Every replacement with search array is
6683
     *                                     performed on the result of previous replacement.
6684
     *                                     </p>
6685
     * @param string|string[] $replacement <p>The replacement.</p>
6686
     * @param string|string[] $subject     <p>
6687
     *                                     If subject is an array, then the search and
6688
     *                                     replace is performed with every entry of
6689
     *                                     subject, and the return value is an array as
6690
     *                                     well.
6691
     *                                     </p>
6692
     * @param int             $count       [optional] <p>
6693
     *                                     The number of matched and replaced needles will
6694
     *                                     be returned in count which is passed by
6695
     *                                     reference.
6696
     *                                     </p>
6697
     *
6698
     * @psalm-pure
6699
     *
6700
     * @return string|string[] a string or an array of replacements
6701
     *
6702
     * @template TStrIReplaceSubject
6703
     * @psalm-param TStrIReplaceSubject $subject
6704
     * @psalm-return TStrIReplaceSubject
6705
     */
6706
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6707
    {
6708 29
        $search = (array) $search;
6709
6710
        /** @noinspection AlterInForeachInspection */
6711 29
        foreach ($search as &$s) {
6712 29
            $s = (string) $s;
6713 29
            if ($s === '') {
6714 6
                $s = '/^(?<=.)$/';
6715
            } else {
6716 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6717
            }
6718
        }
6719
6720
        /**
6721
         * @psalm-suppress PossiblyNullArgument
6722
         * @psalm-var TStrIReplaceSubject $subject
6723
         */
6724 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6725
6726 29
        return $subject;
6727
    }
6728
6729
    /**
6730
     * Replaces $search from the beginning of string with $replacement.
6731
     *
6732
     * @param string $str         <p>The input string.</p>
6733
     * @param string $search      <p>The string to search for.</p>
6734
     * @param string $replacement <p>The replacement.</p>
6735
     *
6736
     * @psalm-pure
6737
     *
6738
     * @return string string after the replacements
6739
     */
6740
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6741
    {
6742 17
        if ($str === '') {
6743 4
            if ($replacement === '') {
6744 2
                return '';
6745
            }
6746
6747 2
            if ($search === '') {
6748 2
                return $replacement;
6749
            }
6750
        }
6751
6752 13
        if ($search === '') {
6753 2
            return $str . $replacement;
6754
        }
6755
6756 11
        if (\stripos($str, $search) === 0) {
6757 10
            return $replacement . \substr($str, \strlen($search));
6758
        }
6759
6760 1
        return $str;
6761
    }
6762
6763
    /**
6764
     * Replaces $search from the ending of string with $replacement.
6765
     *
6766
     * @param string $str         <p>The input string.</p>
6767
     * @param string $search      <p>The string to search for.</p>
6768
     * @param string $replacement <p>The replacement.</p>
6769
     *
6770
     * @psalm-pure
6771
     *
6772
     * @return string
6773
     *                <p>string after the replacements.</p>
6774
     */
6775
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6776
    {
6777 17
        if ($str === '') {
6778 4
            if ($replacement === '') {
6779 2
                return '';
6780
            }
6781
6782 2
            if ($search === '') {
6783 2
                return $replacement;
6784
            }
6785
        }
6786
6787 13
        if ($search === '') {
6788 2
            return $str . $replacement;
6789
        }
6790
6791 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6792 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6793
        }
6794
6795 11
        return $str;
6796
    }
6797
6798
    /**
6799
     * Check if the string starts with the given substring, case-insensitive.
6800
     *
6801
     * EXAMPLE: <code>
6802
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6803
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6804
     * </code>
6805
     *
6806
     * @param string $haystack <p>The string to search in.</p>
6807
     * @param string $needle   <p>The substring to search for.</p>
6808
     *
6809
     * @psalm-pure
6810
     *
6811
     * @return bool
6812
     */
6813
    public static function str_istarts_with(string $haystack, string $needle): bool
6814
    {
6815 13
        if ($needle === '') {
6816 2
            return true;
6817
        }
6818
6819 13
        if ($haystack === '') {
6820
            return false;
6821
        }
6822
6823 13
        return self::stripos($haystack, $needle) === 0;
6824
    }
6825
6826
    /**
6827
     * Returns true if the string begins with any of $substrings, false otherwise.
6828
     *
6829
     * - case-insensitive
6830
     *
6831
     * @param string $str        <p>The input string.</p>
6832
     * @param array  $substrings <p>Substrings to look for.</p>
6833
     *
6834
     * @psalm-pure
6835
     *
6836
     * @return bool whether or not $str starts with $substring
6837
     */
6838
    public static function str_istarts_with_any(string $str, array $substrings): bool
6839
    {
6840 5
        if ($str === '') {
6841
            return false;
6842
        }
6843
6844 5
        if ($substrings === []) {
6845
            return false;
6846
        }
6847
6848 5
        foreach ($substrings as &$substring) {
6849 5
            if (self::str_istarts_with($str, $substring)) {
6850 5
                return true;
6851
            }
6852
        }
6853
6854 1
        return false;
6855
    }
6856
6857
    /**
6858
     * Gets the substring after the first occurrence of a separator.
6859
     *
6860
     * @param string $str       <p>The input string.</p>
6861
     * @param string $separator <p>The string separator.</p>
6862
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6863
     *
6864
     * @psalm-pure
6865
     *
6866
     * @return string
6867
     */
6868
    public static function str_isubstr_after_first_separator(
6869
        string $str,
6870
        string $separator,
6871
        string $encoding = 'UTF-8'
6872
    ): string {
6873 1
        if ($separator === '' || $str === '') {
6874 1
            return '';
6875
        }
6876
6877 1
        $offset = self::stripos($str, $separator);
6878 1
        if ($offset === false) {
6879 1
            return '';
6880
        }
6881
6882 1
        if ($encoding === 'UTF-8') {
6883 1
            return (string) \mb_substr(
6884 1
                $str,
6885 1
                $offset + (int) \mb_strlen($separator)
6886
            );
6887
        }
6888
6889
        return (string) self::substr(
6890
            $str,
6891
            $offset + (int) self::strlen($separator, $encoding),
6892
            null,
6893
            $encoding
6894
        );
6895
    }
6896
6897
    /**
6898
     * Gets the substring after the last occurrence of a separator.
6899
     *
6900
     * @param string $str       <p>The input string.</p>
6901
     * @param string $separator <p>The string separator.</p>
6902
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6903
     *
6904
     * @psalm-pure
6905
     *
6906
     * @return string
6907
     */
6908
    public static function str_isubstr_after_last_separator(
6909
        string $str,
6910
        string $separator,
6911
        string $encoding = 'UTF-8'
6912
    ): string {
6913 1
        if ($separator === '' || $str === '') {
6914 1
            return '';
6915
        }
6916
6917 1
        $offset = self::strripos($str, $separator);
6918 1
        if ($offset === false) {
6919 1
            return '';
6920
        }
6921
6922 1
        if ($encoding === 'UTF-8') {
6923 1
            return (string) \mb_substr(
6924 1
                $str,
6925 1
                $offset + (int) self::strlen($separator)
6926
            );
6927
        }
6928
6929
        return (string) self::substr(
6930
            $str,
6931
            $offset + (int) self::strlen($separator, $encoding),
6932
            null,
6933
            $encoding
6934
        );
6935
    }
6936
6937
    /**
6938
     * Gets the substring before the first occurrence of a separator.
6939
     *
6940
     * @param string $str       <p>The input string.</p>
6941
     * @param string $separator <p>The string separator.</p>
6942
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6943
     *
6944
     * @psalm-pure
6945
     *
6946
     * @return string
6947
     */
6948
    public static function str_isubstr_before_first_separator(
6949
        string $str,
6950
        string $separator,
6951
        string $encoding = 'UTF-8'
6952
    ): string {
6953 1
        if ($separator === '' || $str === '') {
6954 1
            return '';
6955
        }
6956
6957 1
        $offset = self::stripos($str, $separator);
6958 1
        if ($offset === false) {
6959 1
            return '';
6960
        }
6961
6962 1
        if ($encoding === 'UTF-8') {
6963 1
            return (string) \mb_substr($str, 0, $offset);
6964
        }
6965
6966
        return (string) self::substr($str, 0, $offset, $encoding);
6967
    }
6968
6969
    /**
6970
     * Gets the substring before the last occurrence of a separator.
6971
     *
6972
     * @param string $str       <p>The input string.</p>
6973
     * @param string $separator <p>The string separator.</p>
6974
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6975
     *
6976
     * @psalm-pure
6977
     *
6978
     * @return string
6979
     */
6980
    public static function str_isubstr_before_last_separator(
6981
        string $str,
6982
        string $separator,
6983
        string $encoding = 'UTF-8'
6984
    ): string {
6985 1
        if ($separator === '' || $str === '') {
6986 1
            return '';
6987
        }
6988
6989 1
        if ($encoding === 'UTF-8') {
6990 1
            $offset = \mb_strripos($str, $separator);
6991 1
            if ($offset === false) {
6992 1
                return '';
6993
            }
6994
6995 1
            return (string) \mb_substr($str, 0, $offset);
6996
        }
6997
6998
        $offset = self::strripos($str, $separator, 0, $encoding);
6999
        if ($offset === false) {
7000
            return '';
7001
        }
7002
7003
        return (string) self::substr($str, 0, $offset, $encoding);
7004
    }
7005
7006
    /**
7007
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7008
     *
7009
     * @param string $str           <p>The input string.</p>
7010
     * @param string $needle        <p>The string to look for.</p>
7011
     * @param bool   $before_needle [optional] <p>Default: false</p>
7012
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7013
     *
7014
     * @psalm-pure
7015
     *
7016
     * @return string
7017
     */
7018
    public static function str_isubstr_first(
7019
        string $str,
7020
        string $needle,
7021
        bool $before_needle = false,
7022
        string $encoding = 'UTF-8'
7023
    ): string {
7024
        if (
7025 2
            $needle === ''
7026
            ||
7027 2
            $str === ''
7028
        ) {
7029 2
            return '';
7030
        }
7031
7032 2
        $part = self::stristr(
7033 2
            $str,
7034 2
            $needle,
7035 2
            $before_needle,
7036 2
            $encoding
7037
        );
7038 2
        if ($part === false) {
7039 2
            return '';
7040
        }
7041
7042 2
        return $part;
7043
    }
7044
7045
    /**
7046
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7047
     *
7048
     * @param string $str           <p>The input string.</p>
7049
     * @param string $needle        <p>The string to look for.</p>
7050
     * @param bool   $before_needle [optional] <p>Default: false</p>
7051
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7052
     *
7053
     * @psalm-pure
7054
     *
7055
     * @return string
7056
     */
7057
    public static function str_isubstr_last(
7058
        string $str,
7059
        string $needle,
7060
        bool $before_needle = false,
7061
        string $encoding = 'UTF-8'
7062
    ): string {
7063
        if (
7064 1
            $needle === ''
7065
            ||
7066 1
            $str === ''
7067
        ) {
7068 1
            return '';
7069
        }
7070
7071 1
        $part = self::strrichr(
7072 1
            $str,
7073 1
            $needle,
7074 1
            $before_needle,
7075 1
            $encoding
7076
        );
7077 1
        if ($part === false) {
7078 1
            return '';
7079
        }
7080
7081 1
        return $part;
7082
    }
7083
7084
    /**
7085
     * Returns the last $n characters of the string.
7086
     *
7087
     * @param string $str      <p>The input string.</p>
7088
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7089
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7090
     *
7091
     * @psalm-pure
7092
     *
7093
     * @return string
7094
     */
7095
    public static function str_last_char(
7096
        string $str,
7097
        int $n = 1,
7098
        string $encoding = 'UTF-8'
7099
    ): string {
7100 12
        if ($str === '' || $n <= 0) {
7101 4
            return '';
7102
        }
7103
7104 8
        if ($encoding === 'UTF-8') {
7105 4
            return (string) \mb_substr($str, -$n);
7106
        }
7107
7108 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7109
7110 4
        return (string) self::substr($str, -$n, null, $encoding);
7111
    }
7112
7113
    /**
7114
     * Limit the number of characters in a string.
7115
     *
7116
     * @param string $str        <p>The input string.</p>
7117
     * @param int    $length     [optional] <p>Default: 100</p>
7118
     * @param string $str_add_on [optional] <p>Default: …</p>
7119
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7120
     *
7121
     * @psalm-pure
7122
     *
7123
     * @return string
7124
     */
7125
    public static function str_limit(
7126
        string $str,
7127
        int $length = 100,
7128
        string $str_add_on = '…',
7129
        string $encoding = 'UTF-8'
7130
    ): string {
7131 2
        if ($str === '' || $length <= 0) {
7132 2
            return '';
7133
        }
7134
7135 2
        if ($encoding === 'UTF-8') {
7136 2
            if ((int) \mb_strlen($str) <= $length) {
7137 2
                return $str;
7138
            }
7139
7140
            /** @noinspection UnnecessaryCastingInspection */
7141 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7142
        }
7143
7144
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7145
7146
        if ((int) self::strlen($str, $encoding) <= $length) {
7147
            return $str;
7148
        }
7149
7150
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7151
    }
7152
7153
    /**
7154
     * Limit the number of characters in a string, but also after the next word.
7155
     *
7156
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7157
     *
7158
     * @param string $str        <p>The input string.</p>
7159
     * @param int    $length     [optional] <p>Default: 100</p>
7160
     * @param string $str_add_on [optional] <p>Default: …</p>
7161
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7162
     *
7163
     * @psalm-pure
7164
     *
7165
     * @return string
7166
     */
7167
    public static function str_limit_after_word(
7168
        string $str,
7169
        int $length = 100,
7170
        string $str_add_on = '…',
7171
        string $encoding = 'UTF-8'
7172
    ): string {
7173 6
        if ($str === '' || $length <= 0) {
7174 2
            return '';
7175
        }
7176
7177 6
        if ($encoding === 'UTF-8') {
7178
            /** @noinspection UnnecessaryCastingInspection */
7179 2
            if ((int) \mb_strlen($str) <= $length) {
7180 2
                return $str;
7181
            }
7182
7183 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7184 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7185
            }
7186
7187 2
            $str = \mb_substr($str, 0, $length);
7188
7189 2
            $array = \explode(' ', $str, -1);
7190 2
            $new_str = \implode(' ', $array);
7191
7192 2
            if ($new_str === '') {
7193 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7194
            }
7195
        } else {
7196 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7197
                return $str;
7198
            }
7199
7200 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7201 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7202
            }
7203
7204
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7205 1
            $str = self::substr($str, 0, $length, $encoding);
7206
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7207 1
            if ($str === false) {
7208
                return '' . $str_add_on;
7209
            }
7210
7211 1
            $array = \explode(' ', $str, -1);
7212 1
            $new_str = \implode(' ', $array);
7213
7214 1
            if ($new_str === '') {
7215
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7216
            }
7217
        }
7218
7219 3
        return $new_str . $str_add_on;
7220
    }
7221
7222
    /**
7223
     * Returns the longest common prefix between the $str1 and $str2.
7224
     *
7225
     * @param string $str1     <p>The input sting.</p>
7226
     * @param string $str2     <p>Second string for comparison.</p>
7227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7228
     *
7229
     * @psalm-pure
7230
     *
7231
     * @return string
7232
     */
7233
    public static function str_longest_common_prefix(
7234
        string $str1,
7235
        string $str2,
7236
        string $encoding = 'UTF-8'
7237
    ): string {
7238
        // init
7239 10
        $longest_common_prefix = '';
7240
7241 10
        if ($encoding === 'UTF-8') {
7242 5
            $max_length = (int) \min(
7243 5
                \mb_strlen($str1),
7244 5
                \mb_strlen($str2)
7245
            );
7246
7247 5
            for ($i = 0; $i < $max_length; ++$i) {
7248 4
                $char = \mb_substr($str1, $i, 1);
7249
7250
                if (
7251 4
                    $char !== false
7252
                    &&
7253 4
                    $char === \mb_substr($str2, $i, 1)
7254
                ) {
7255 3
                    $longest_common_prefix .= $char;
7256
                } else {
7257 3
                    break;
7258
                }
7259
            }
7260
        } else {
7261 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7262
7263 5
            $max_length = (int) \min(
7264 5
                self::strlen($str1, $encoding),
7265 5
                self::strlen($str2, $encoding)
7266
            );
7267
7268 5
            for ($i = 0; $i < $max_length; ++$i) {
7269 4
                $char = self::substr($str1, $i, 1, $encoding);
7270
7271
                if (
7272 4
                    $char !== false
7273
                    &&
7274 4
                    $char === self::substr($str2, $i, 1, $encoding)
7275
                ) {
7276 3
                    $longest_common_prefix .= $char;
7277
                } else {
7278 3
                    break;
7279
                }
7280
            }
7281
        }
7282
7283 10
        return $longest_common_prefix;
7284
    }
7285
7286
    /**
7287
     * Returns the longest common substring between the $str1 and $str2.
7288
     * In the case of ties, it returns that which occurs first.
7289
     *
7290
     * @param string $str1
7291
     * @param string $str2     <p>Second string for comparison.</p>
7292
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7293
     *
7294
     * @psalm-pure
7295
     *
7296
     * @return string
7297
     *                <p>A string with its $str being the longest common substring.</p>
7298
     */
7299
    public static function str_longest_common_substring(
7300
        string $str1,
7301
        string $str2,
7302
        string $encoding = 'UTF-8'
7303
    ): string {
7304 11
        if ($str1 === '' || $str2 === '') {
7305 2
            return '';
7306
        }
7307
7308
        // Uses dynamic programming to solve
7309
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7310
7311 9
        if ($encoding === 'UTF-8') {
7312 4
            $str_length = (int) \mb_strlen($str1);
7313 4
            $other_length = (int) \mb_strlen($str2);
7314
        } else {
7315 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7316
7317 5
            $str_length = (int) self::strlen($str1, $encoding);
7318 5
            $other_length = (int) self::strlen($str2, $encoding);
7319
        }
7320
7321
        // Return if either string is empty
7322 9
        if ($str_length === 0 || $other_length === 0) {
7323
            return '';
7324
        }
7325
7326 9
        $len = 0;
7327 9
        $end = 0;
7328 9
        $table = \array_fill(
7329 9
            0,
7330 9
            $str_length + 1,
7331 9
            \array_fill(0, $other_length + 1, 0)
7332
        );
7333
7334 9
        if ($encoding === 'UTF-8') {
7335 9
            for ($i = 1; $i <= $str_length; ++$i) {
7336 9
                for ($j = 1; $j <= $other_length; ++$j) {
7337 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7338 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7339
7340 9
                    if ($str_char === $other_char) {
7341 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7342 8
                        if ($table[$i][$j] > $len) {
7343 8
                            $len = $table[$i][$j];
7344 8
                            $end = $i;
7345
                        }
7346
                    } else {
7347 9
                        $table[$i][$j] = 0;
7348
                    }
7349
                }
7350
            }
7351
        } else {
7352
            for ($i = 1; $i <= $str_length; ++$i) {
7353
                for ($j = 1; $j <= $other_length; ++$j) {
7354
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7355
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7356
7357
                    if ($str_char === $other_char) {
7358
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7359
                        if ($table[$i][$j] > $len) {
7360
                            $len = $table[$i][$j];
7361
                            $end = $i;
7362
                        }
7363
                    } else {
7364
                        $table[$i][$j] = 0;
7365
                    }
7366
                }
7367
            }
7368
        }
7369
7370 9
        if ($encoding === 'UTF-8') {
7371 9
            return (string) \mb_substr($str1, $end - $len, $len);
7372
        }
7373
7374
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7375
    }
7376
7377
    /**
7378
     * Returns the longest common suffix between the $str1 and $str2.
7379
     *
7380
     * @param string $str1
7381
     * @param string $str2     <p>Second string for comparison.</p>
7382
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7383
     *
7384
     * @psalm-pure
7385
     *
7386
     * @return string
7387
     */
7388
    public static function str_longest_common_suffix(
7389
        string $str1,
7390
        string $str2,
7391
        string $encoding = 'UTF-8'
7392
    ): string {
7393 10
        if ($str1 === '' || $str2 === '') {
7394 2
            return '';
7395
        }
7396
7397 8
        if ($encoding === 'UTF-8') {
7398 4
            $max_length = (int) \min(
7399 4
                \mb_strlen($str1, $encoding),
7400 4
                \mb_strlen($str2, $encoding)
7401
            );
7402
7403 4
            $longest_common_suffix = '';
7404 4
            for ($i = 1; $i <= $max_length; ++$i) {
7405 4
                $char = \mb_substr($str1, -$i, 1);
7406
7407
                if (
7408 4
                    $char !== false
7409
                    &&
7410 4
                    $char === \mb_substr($str2, -$i, 1)
7411
                ) {
7412 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7413
                } else {
7414 3
                    break;
7415
                }
7416
            }
7417
        } else {
7418 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7419
7420 4
            $max_length = (int) \min(
7421 4
                self::strlen($str1, $encoding),
7422 4
                self::strlen($str2, $encoding)
7423
            );
7424
7425 4
            $longest_common_suffix = '';
7426 4
            for ($i = 1; $i <= $max_length; ++$i) {
7427 4
                $char = self::substr($str1, -$i, 1, $encoding);
7428
7429
                if (
7430 4
                    $char !== false
7431
                    &&
7432 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7433
                ) {
7434 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7435
                } else {
7436 3
                    break;
7437
                }
7438
            }
7439
        }
7440
7441 8
        return $longest_common_suffix;
7442
    }
7443
7444
    /**
7445
     * Returns true if $str matches the supplied pattern, false otherwise.
7446
     *
7447
     * @param string $str     <p>The input string.</p>
7448
     * @param string $pattern <p>Regex pattern to match against.</p>
7449
     *
7450
     * @psalm-pure
7451
     *
7452
     * @return bool whether or not $str matches the pattern
7453
     */
7454
    public static function str_matches_pattern(string $str, string $pattern): bool
7455
    {
7456 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7457
    }
7458
7459
    /**
7460
     * Returns whether or not a character exists at an index. Offsets may be
7461
     * negative to count from the last character in the string. Implements
7462
     * part of the ArrayAccess interface.
7463
     *
7464
     * @param string $str      <p>The input string.</p>
7465
     * @param int    $offset   <p>The index to check.</p>
7466
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7467
     *
7468
     * @psalm-pure
7469
     *
7470
     * @return bool whether or not the index exists
7471
     */
7472
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7473
    {
7474
        // init
7475 6
        $length = (int) self::strlen($str, $encoding);
7476
7477 6
        if ($offset >= 0) {
7478 3
            return $length > $offset;
7479
        }
7480
7481 3
        return $length >= \abs($offset);
7482
    }
7483
7484
    /**
7485
     * Returns the character at the given index. Offsets may be negative to
7486
     * count from the last character in the string. Implements part of the
7487
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7488
     * does not exist.
7489
     *
7490
     * @param string $str      <p>The input string.</p>
7491
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7492
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7493
     *
7494
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7495
     *
7496
     * @return string
7497
     *                <p>The character at the specified index.</p>
7498
     *
7499
     * @psalm-pure
7500
     */
7501
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7502
    {
7503
        // init
7504 2
        $length = (int) self::strlen($str);
7505
7506
        if (
7507 2
            ($index >= 0 && $length <= $index)
7508
            ||
7509 2
            $length < \abs($index)
7510
        ) {
7511 1
            throw new \OutOfBoundsException('No character exists at the index');
7512
        }
7513
7514 1
        return self::char_at($str, $index, $encoding);
7515
    }
7516
7517
    /**
7518
     * Pad a UTF-8 string to a given length with another string.
7519
     *
7520
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7521
     *
7522
     * @param string     $str        <p>The input string.</p>
7523
     * @param int        $pad_length <p>The length of return string.</p>
7524
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7525
     * @param int|string $pad_type   [optional] <p>
7526
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7527
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7528
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7529
     *                               </p>
7530
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7531
     *
7532
     * @psalm-pure
7533
     *
7534
     * @return string
7535
     *                <p>Returns the padded string.</p>
7536
     */
7537
    public static function str_pad(
7538
        string $str,
7539
        int $pad_length,
7540
        string $pad_string = ' ',
7541
        $pad_type = \STR_PAD_RIGHT,
7542
        string $encoding = 'UTF-8'
7543
    ): string {
7544 41
        if ($pad_length === 0 || $pad_string === '') {
7545 1
            return $str;
7546
        }
7547
7548 41
        if ($pad_type !== (int) $pad_type) {
7549 13
            if ($pad_type === 'left') {
7550 3
                $pad_type = \STR_PAD_LEFT;
7551 10
            } elseif ($pad_type === 'right') {
7552 6
                $pad_type = \STR_PAD_RIGHT;
7553 4
            } elseif ($pad_type === 'both') {
7554 3
                $pad_type = \STR_PAD_BOTH;
7555
            } else {
7556 1
                throw new \InvalidArgumentException(
7557 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7558
                );
7559
            }
7560
        }
7561
7562 40
        if ($encoding === 'UTF-8') {
7563 25
            $str_length = (int) \mb_strlen($str);
7564
7565 25
            if ($pad_length >= $str_length) {
7566
                switch ($pad_type) {
7567 25
                    case \STR_PAD_LEFT:
7568 8
                        $ps_length = (int) \mb_strlen($pad_string);
7569
7570 8
                        $diff = ($pad_length - $str_length);
7571
7572 8
                        $pre = (string) \mb_substr(
7573 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7574 8
                            0,
7575 8
                            $diff
7576
                        );
7577 8
                        $post = '';
7578
7579 8
                        break;
7580
7581 20
                    case \STR_PAD_BOTH:
7582 14
                        $diff = ($pad_length - $str_length);
7583
7584 14
                        $ps_length_left = (int) \floor($diff / 2);
7585
7586 14
                        $ps_length_right = (int) \ceil($diff / 2);
7587
7588 14
                        $pre = (string) \mb_substr(
7589 14
                            \str_repeat($pad_string, $ps_length_left),
7590 14
                            0,
7591 14
                            $ps_length_left
7592
                        );
7593 14
                        $post = (string) \mb_substr(
7594 14
                            \str_repeat($pad_string, $ps_length_right),
7595 14
                            0,
7596 14
                            $ps_length_right
7597
                        );
7598
7599 14
                        break;
7600
7601 9
                    case \STR_PAD_RIGHT:
7602
                    default:
7603 9
                        $ps_length = (int) \mb_strlen($pad_string);
7604
7605 9
                        $diff = ($pad_length - $str_length);
7606
7607 9
                        $post = (string) \mb_substr(
7608 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7609 9
                            0,
7610 9
                            $diff
7611
                        );
7612 9
                        $pre = '';
7613
                }
7614
7615 25
                return $pre . $str . $post;
7616
            }
7617
7618 3
            return $str;
7619
        }
7620
7621 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7622
7623 15
        $str_length = (int) self::strlen($str, $encoding);
7624
7625 15
        if ($pad_length >= $str_length) {
7626
            switch ($pad_type) {
7627 14
                case \STR_PAD_LEFT:
7628 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7629
7630 5
                    $diff = ($pad_length - $str_length);
7631
7632 5
                    $pre = (string) self::substr(
7633 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7634 5
                        0,
7635 5
                        $diff,
7636 5
                        $encoding
7637
                    );
7638 5
                    $post = '';
7639
7640 5
                    break;
7641
7642 9
                case \STR_PAD_BOTH:
7643 3
                    $diff = ($pad_length - $str_length);
7644
7645 3
                    $ps_length_left = (int) \floor($diff / 2);
7646
7647 3
                    $ps_length_right = (int) \ceil($diff / 2);
7648
7649 3
                    $pre = (string) self::substr(
7650 3
                        \str_repeat($pad_string, $ps_length_left),
7651 3
                        0,
7652 3
                        $ps_length_left,
7653 3
                        $encoding
7654
                    );
7655 3
                    $post = (string) self::substr(
7656 3
                        \str_repeat($pad_string, $ps_length_right),
7657 3
                        0,
7658 3
                        $ps_length_right,
7659 3
                        $encoding
7660
                    );
7661
7662 3
                    break;
7663
7664 6
                case \STR_PAD_RIGHT:
7665
                default:
7666 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7667
7668 6
                    $diff = ($pad_length - $str_length);
7669
7670 6
                    $post = (string) self::substr(
7671 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7672 6
                        0,
7673 6
                        $diff,
7674 6
                        $encoding
7675
                    );
7676 6
                    $pre = '';
7677
            }
7678
7679 14
            return $pre . $str . $post;
7680
        }
7681
7682 1
        return $str;
7683
    }
7684
7685
    /**
7686
     * Returns a new string of a given length such that both sides of the
7687
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7688
     *
7689
     * @param string $str
7690
     * @param int    $length   <p>Desired string length after padding.</p>
7691
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7692
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7693
     *
7694
     * @psalm-pure
7695
     *
7696
     * @return string
7697
     *                <p>The string with padding applied.</p>
7698
     */
7699
    public static function str_pad_both(
7700
        string $str,
7701
        int $length,
7702
        string $pad_str = ' ',
7703
        string $encoding = 'UTF-8'
7704
    ): string {
7705 11
        return self::str_pad(
7706 11
            $str,
7707 11
            $length,
7708 11
            $pad_str,
7709 11
            \STR_PAD_BOTH,
7710 11
            $encoding
7711
        );
7712
    }
7713
7714
    /**
7715
     * Returns a new string of a given length such that the beginning of the
7716
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7717
     *
7718
     * @param string $str
7719
     * @param int    $length   <p>Desired string length after padding.</p>
7720
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7721
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7722
     *
7723
     * @psalm-pure
7724
     *
7725
     * @return string
7726
     *                <p>The string with left padding.</p>
7727
     */
7728
    public static function str_pad_left(
7729
        string $str,
7730
        int $length,
7731
        string $pad_str = ' ',
7732
        string $encoding = 'UTF-8'
7733
    ): string {
7734 7
        return self::str_pad(
7735 7
            $str,
7736 7
            $length,
7737 7
            $pad_str,
7738 7
            \STR_PAD_LEFT,
7739 7
            $encoding
7740
        );
7741
    }
7742
7743
    /**
7744
     * Returns a new string of a given length such that the end of the string
7745
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7746
     *
7747
     * @param string $str
7748
     * @param int    $length   <p>Desired string length after padding.</p>
7749
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7750
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7751
     *
7752
     * @psalm-pure
7753
     *
7754
     * @return string
7755
     *                <p>The string with right padding.</p>
7756
     */
7757
    public static function str_pad_right(
7758
        string $str,
7759
        int $length,
7760
        string $pad_str = ' ',
7761
        string $encoding = 'UTF-8'
7762
    ): string {
7763 7
        return self::str_pad(
7764 7
            $str,
7765 7
            $length,
7766 7
            $pad_str,
7767 7
            \STR_PAD_RIGHT,
7768 7
            $encoding
7769
        );
7770
    }
7771
7772
    /**
7773
     * Repeat a string.
7774
     *
7775
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7776
     *
7777
     * @param string $str        <p>
7778
     *                           The string to be repeated.
7779
     *                           </p>
7780
     * @param int    $multiplier <p>
7781
     *                           Number of time the input string should be
7782
     *                           repeated.
7783
     *                           </p>
7784
     *                           <p>
7785
     *                           multiplier has to be greater than or equal to 0.
7786
     *                           If the multiplier is set to 0, the function
7787
     *                           will return an empty string.
7788
     *                           </p>
7789
     *
7790
     * @psalm-pure
7791
     *
7792
     * @return string
7793
     *                <p>The repeated string.</p>
7794
     */
7795
    public static function str_repeat(string $str, int $multiplier): string
7796
    {
7797 9
        $str = self::filter($str);
7798
7799 9
        return \str_repeat($str, $multiplier);
7800
    }
7801
7802
    /**
7803
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7804
     *
7805
     * Replace all occurrences of the search string with the replacement string
7806
     *
7807
     * @see http://php.net/manual/en/function.str-replace.php
7808
     *
7809
     * @param string|string[] $search  <p>
7810
     *                                 The value being searched for, otherwise known as the needle.
7811
     *                                 An array may be used to designate multiple needles.
7812
     *                                 </p>
7813
     * @param string|string[] $replace <p>
7814
     *                                 The replacement value that replaces found search
7815
     *                                 values. An array may be used to designate multiple replacements.
7816
     *                                 </p>
7817
     * @param string|string[] $subject <p>
7818
     *                                 The string or array of strings being searched and replaced on,
7819
     *                                 otherwise known as the haystack.
7820
     *                                 </p>
7821
     *                                 <p>
7822
     *                                 If subject is an array, then the search and
7823
     *                                 replace is performed with every entry of
7824
     *                                 subject, and the return value is an array as
7825
     *                                 well.
7826
     *                                 </p>
7827
     * @param int             $count   [optional] If passed, this will hold the number of matched and replaced needles
7828
     *
7829
     * @psalm-pure
7830
     *
7831
     * @return string|string[] this function returns a string or an array with the replaced values
7832
     *
7833
     * @template TStrReplaceSubject
7834
     * @psalm-param TStrReplaceSubject $subject
7835
     * @psalm-return TStrReplaceSubject
7836
     *
7837
     * @deprecated please use \str_replace() instead
7838
     */
7839
    public static function str_replace(
7840
        $search,
7841
        $replace,
7842
        $subject,
7843
        int &$count = null
7844
    ) {
7845
        /**
7846
         * @psalm-suppress PossiblyNullArgument
7847
         * @psalm-var TStrReplaceSubject $return;
7848
         */
7849 12
        $return = \str_replace(
7850 12
            $search,
7851 12
            $replace,
7852 12
            $subject,
7853 12
            $count
7854
        );
7855
7856 12
        return $return;
7857
    }
7858
7859
    /**
7860
     * Replaces $search from the beginning of string with $replacement.
7861
     *
7862
     * @param string $str         <p>The input string.</p>
7863
     * @param string $search      <p>The string to search for.</p>
7864
     * @param string $replacement <p>The replacement.</p>
7865
     *
7866
     * @psalm-pure
7867
     *
7868
     * @return string
7869
     *                <p>A string after the replacements.</p>
7870
     */
7871
    public static function str_replace_beginning(
7872
        string $str,
7873
        string $search,
7874
        string $replacement
7875
    ): string {
7876 17
        if ($str === '') {
7877 4
            if ($replacement === '') {
7878 2
                return '';
7879
            }
7880
7881 2
            if ($search === '') {
7882 2
                return $replacement;
7883
            }
7884
        }
7885
7886 13
        if ($search === '') {
7887 2
            return $str . $replacement;
7888
        }
7889
7890 11
        if (\strpos($str, $search) === 0) {
7891 9
            return $replacement . \substr($str, \strlen($search));
7892
        }
7893
7894 2
        return $str;
7895
    }
7896
7897
    /**
7898
     * Replaces $search from the ending of string with $replacement.
7899
     *
7900
     * @param string $str         <p>The input string.</p>
7901
     * @param string $search      <p>The string to search for.</p>
7902
     * @param string $replacement <p>The replacement.</p>
7903
     *
7904
     * @psalm-pure
7905
     *
7906
     * @return string
7907
     *                <p>A string after the replacements.</p>
7908
     */
7909
    public static function str_replace_ending(
7910
        string $str,
7911
        string $search,
7912
        string $replacement
7913
    ): string {
7914 17
        if ($str === '') {
7915 4
            if ($replacement === '') {
7916 2
                return '';
7917
            }
7918
7919 2
            if ($search === '') {
7920 2
                return $replacement;
7921
            }
7922
        }
7923
7924 13
        if ($search === '') {
7925 2
            return $str . $replacement;
7926
        }
7927
7928 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7929 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7930
        }
7931
7932 11
        return $str;
7933
    }
7934
7935
    /**
7936
     * Replace the first "$search"-term with the "$replace"-term.
7937
     *
7938
     * @param string $search
7939
     * @param string $replace
7940
     * @param string $subject
7941
     *
7942
     * @psalm-pure
7943
     *
7944
     * @return string
7945
     *
7946
     * @psalm-suppress InvalidReturnType
7947
     */
7948
    public static function str_replace_first(
7949
        string $search,
7950
        string $replace,
7951
        string $subject
7952
    ): string {
7953 2
        $pos = self::strpos($subject, $search);
7954
7955 2
        if ($pos !== false) {
7956
            /**
7957
             * @psalm-suppress InvalidReturnStatement
7958
             */
7959 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7960 2
                $subject,
7961 2
                $replace,
7962 2
                $pos,
7963 2
                (int) self::strlen($search)
7964
            );
7965
        }
7966
7967 2
        return $subject;
7968
    }
7969
7970
    /**
7971
     * Replace the last "$search"-term with the "$replace"-term.
7972
     *
7973
     * @param string $search
7974
     * @param string $replace
7975
     * @param string $subject
7976
     *
7977
     * @psalm-pure
7978
     *
7979
     * @return string
7980
     *
7981
     * @psalm-suppress InvalidReturnType
7982
     */
7983
    public static function str_replace_last(
7984
        string $search,
7985
        string $replace,
7986
        string $subject
7987
    ): string {
7988 2
        $pos = self::strrpos($subject, $search);
7989 2
        if ($pos !== false) {
7990
            /**
7991
             * @psalm-suppress InvalidReturnStatement
7992
             */
7993 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7994 2
                $subject,
7995 2
                $replace,
7996 2
                $pos,
7997 2
                (int) self::strlen($search)
7998
            );
7999
        }
8000
8001 2
        return $subject;
8002
    }
8003
8004
    /**
8005
     * Shuffles all the characters in the string.
8006
     *
8007
     * INFO: uses random algorithm which is weak for cryptography purposes
8008
     *
8009
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8010
     *
8011
     * @param string $str      <p>The input string</p>
8012
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8013
     *
8014
     * @return string
8015
     *                <p>The shuffled string.</p>
8016
     */
8017
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8018
    {
8019 5
        if ($encoding === 'UTF-8') {
8020 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8021
            /** @noinspection NonSecureShuffleUsageInspection */
8022 5
            \shuffle($indexes);
8023
8024
            // init
8025 5
            $shuffled_str = '';
8026
8027 5
            foreach ($indexes as &$i) {
8028 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8029 5
                if ($tmp_sub_str !== false) {
8030 5
                    $shuffled_str .= $tmp_sub_str;
8031
                }
8032
            }
8033
        } else {
8034
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8035
8036
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8037
            /** @noinspection NonSecureShuffleUsageInspection */
8038
            \shuffle($indexes);
8039
8040
            // init
8041
            $shuffled_str = '';
8042
8043
            foreach ($indexes as &$i) {
8044
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8045
                if ($tmp_sub_str !== false) {
8046
                    $shuffled_str .= $tmp_sub_str;
8047
                }
8048
            }
8049
        }
8050
8051 5
        return $shuffled_str;
8052
    }
8053
8054
    /**
8055
     * Returns the substring beginning at $start, and up to, but not including
8056
     * the index specified by $end. If $end is omitted, the function extracts
8057
     * the remaining string. If $end is negative, it is computed from the end
8058
     * of the string.
8059
     *
8060
     * @param string $str
8061
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
8062
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
8063
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8064
     *
8065
     * @psalm-pure
8066
     *
8067
     * @return false|string
8068
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8069
     *                      characters long, <b>FALSE</b> will be returned.
8070
     */
8071
    public static function str_slice(
8072
        string $str,
8073
        int $start,
8074
        int $end = null,
8075
        string $encoding = 'UTF-8'
8076
    ) {
8077 18
        if ($encoding === 'UTF-8') {
8078 7
            if ($end === null) {
8079 1
                $length = (int) \mb_strlen($str);
8080 6
            } elseif ($end >= 0 && $end <= $start) {
8081 2
                return '';
8082 4
            } elseif ($end < 0) {
8083 1
                $length = (int) \mb_strlen($str) + $end - $start;
8084
            } else {
8085 3
                $length = $end - $start;
8086
            }
8087
8088 5
            return \mb_substr($str, $start, $length);
8089
        }
8090
8091 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8092
8093 11
        if ($end === null) {
8094 5
            $length = (int) self::strlen($str, $encoding);
8095 6
        } elseif ($end >= 0 && $end <= $start) {
8096 2
            return '';
8097 4
        } elseif ($end < 0) {
8098 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8099
        } else {
8100 3
            $length = $end - $start;
8101
        }
8102
8103 9
        return self::substr($str, $start, $length, $encoding);
8104
    }
8105
8106
    /**
8107
     * Convert a string to e.g.: "snake_case"
8108
     *
8109
     * @param string $str
8110
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8111
     *
8112
     * @psalm-pure
8113
     *
8114
     * @return string
8115
     *                <p>A string in snake_case.</p>
8116
     */
8117
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8118
    {
8119 22
        if ($str === '') {
8120
            return '';
8121
        }
8122
8123 22
        $str = \str_replace(
8124 22
            '-',
8125 22
            '_',
8126 22
            self::normalize_whitespace($str)
8127
        );
8128
8129 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8130 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8131
        }
8132
8133 22
        $str = (string) \preg_replace_callback(
8134 22
            '/([\\p{N}|\\p{Lu}])/u',
8135
            /**
8136
             * @param string[] $matches
8137
             *
8138
             * @psalm-pure
8139
             *
8140
             * @return string
8141
             */
8142
            static function (array $matches) use ($encoding): string {
8143 9
                $match = $matches[1];
8144 9
                $match_int = (int) $match;
8145
8146 9
                if ((string) $match_int === $match) {
8147 4
                    return '_' . $match . '_';
8148
                }
8149
8150 5
                if ($encoding === 'UTF-8') {
8151 5
                    return '_' . \mb_strtolower($match);
8152
                }
8153
8154
                return '_' . self::strtolower($match, $encoding);
8155 22
            },
8156 22
            $str
8157
        );
8158
8159 22
        $str = (string) \preg_replace(
8160
            [
8161 22
                '/\\s+/u',           // convert spaces to "_"
8162
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8163
                '/_+/',                 // remove double "_"
8164
            ],
8165
            [
8166 22
                '_',
8167
                '',
8168
                '_',
8169
            ],
8170 22
            $str
8171
        );
8172
8173 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8174
    }
8175
8176
    /**
8177
     * Sort all characters according to code points.
8178
     *
8179
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8180
     *
8181
     * @param string $str    <p>A UTF-8 string.</p>
8182
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8183
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8184
     *
8185
     * @psalm-pure
8186
     *
8187
     * @return string
8188
     *                <p>A string of sorted characters.</p>
8189
     */
8190
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8191
    {
8192 2
        $array = self::codepoints($str);
8193
8194 2
        if ($unique) {
8195 2
            $array = \array_flip(\array_flip($array));
8196
        }
8197
8198 2
        if ($desc) {
8199 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8199
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8200
        } else {
8201 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8201
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8202
        }
8203
8204 2
        return self::string($array);
8205
    }
8206
8207
    /**
8208
     * Convert a string to an array of Unicode characters.
8209
     *
8210
     * EXAMPLE: <code>
8211
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8212
     * </code>
8213
     *
8214
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8215
     * @param int            $length                  [optional] <p>Max character length of each array
8216
     *                                                lement.</p>
8217
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8218
     *                                                string.</p>
8219
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8220
     *                                                "mb_substr"</p>
8221
     *
8222
     * @psalm-pure
8223
     *
8224
     * @return string[][]
8225
     *                    <p>An array containing chunks of the input.</p>
8226
     */
8227
    public static function str_split_array(
8228
        array $input,
8229
        int $length = 1,
8230
        bool $clean_utf8 = false,
8231
        bool $try_to_use_mb_functions = true
8232
    ): array {
8233 1
        foreach ($input as $k => &$v) {
8234 1
            $v = self::str_split(
8235 1
                $v,
8236 1
                $length,
8237 1
                $clean_utf8,
8238 1
                $try_to_use_mb_functions
8239
            );
8240
        }
8241
8242
        /** @var string[][] $input */
8243 1
        return $input;
8244
    }
8245
8246
    /**
8247
     * Convert a string to an array of unicode characters.
8248
     *
8249
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8250
     *
8251
     * @param int|string $input                   <p>The string or int to split into array.</p>
8252
     * @param int        $length                  [optional] <p>Max character length of each array
8253
     *                                            element.</p>
8254
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8255
     *                                            string.</p>
8256
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8257
     *                                            "mb_substr"</p>
8258
     *
8259
     * @psalm-pure
8260
     *
8261
     * @return string[]
8262
     *                  <p>An array containing chunks of chars from the input.</p>
8263
     *
8264
     * @noinspection SuspiciousBinaryOperationInspection
8265
     * @noinspection OffsetOperationsInspection
8266
     */
8267
    public static function str_split(
8268
        $input,
8269
        int $length = 1,
8270
        bool $clean_utf8 = false,
8271
        bool $try_to_use_mb_functions = true
8272
    ): array {
8273 89
        if ($length <= 0) {
8274 3
            return [];
8275
        }
8276
8277
        // this is only an old fallback
8278
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8279
        /** @var int|int[]|string|string[] $input */
8280 88
        $input = $input;
8281 88
        if (\is_array($input)) {
8282
            /**
8283
             * @psalm-suppress InvalidReturnStatement
8284
             */
8285
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8286
                $input,
8287
                $length,
8288
                $clean_utf8,
8289
                $try_to_use_mb_functions
8290
            );
8291
        }
8292
8293
        // init
8294 88
        $input = (string) $input;
8295
8296 88
        if ($input === '') {
8297 13
            return [];
8298
        }
8299
8300 85
        if ($clean_utf8) {
8301 19
            $input = self::clean($input);
8302
        }
8303
8304
        if (
8305 85
            $try_to_use_mb_functions
8306
            &&
8307 85
            self::$SUPPORT['mbstring'] === true
8308
        ) {
8309 81
            if (Bootup::is_php('7.4')) {
8310
                /**
8311
                 * @psalm-suppress ImpureFunctionCall - why?
8312
                 */
8313
                $return = \mb_str_split($input, $length);
8314
                if ($return !== false) {
8315
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8316
                }
8317
            }
8318
8319 81
            $i_max = \mb_strlen($input);
8320 81
            if ($i_max <= 127) {
8321 75
                $ret = [];
8322 75
                for ($i = 0; $i < $i_max; ++$i) {
8323 75
                    $ret[] = \mb_substr($input, $i, 1);
8324
                }
8325
            } else {
8326 16
                $return_array = [];
8327 16
                \preg_match_all('/./us', $input, $return_array);
8328 81
                $ret = $return_array[0] ?? [];
8329
            }
8330 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8331 17
            $return_array = [];
8332 17
            \preg_match_all('/./us', $input, $return_array);
8333 17
            $ret = $return_array[0] ?? [];
8334
        } else {
8335
8336
            // fallback
8337
8338 8
            $ret = [];
8339 8
            $len = \strlen($input);
8340
8341
            /** @noinspection ForeachInvariantsInspection */
8342 8
            for ($i = 0; $i < $len; ++$i) {
8343 8
                if (($input[$i] & "\x80") === "\x00") {
8344 8
                    $ret[] = $input[$i];
8345
                } elseif (
8346 8
                    isset($input[$i + 1])
8347
                    &&
8348 8
                    ($input[$i] & "\xE0") === "\xC0"
8349
                ) {
8350 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8351 4
                        $ret[] = $input[$i] . $input[$i + 1];
8352
8353 4
                        ++$i;
8354
                    }
8355
                } elseif (
8356 6
                    isset($input[$i + 2])
8357
                    &&
8358 6
                    ($input[$i] & "\xF0") === "\xE0"
8359
                ) {
8360
                    if (
8361 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8362
                        &&
8363 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8364
                    ) {
8365 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8366
8367 6
                        $i += 2;
8368
                    }
8369
                } elseif (
8370
                    isset($input[$i + 3])
8371
                    &&
8372
                    ($input[$i] & "\xF8") === "\xF0"
8373
                ) {
8374
                    if (
8375
                        ($input[$i + 1] & "\xC0") === "\x80"
8376
                        &&
8377
                        ($input[$i + 2] & "\xC0") === "\x80"
8378
                        &&
8379
                        ($input[$i + 3] & "\xC0") === "\x80"
8380
                    ) {
8381
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8382
8383
                        $i += 3;
8384
                    }
8385
                }
8386
            }
8387
        }
8388
8389 85
        if ($length > 1) {
8390 11
            $ret = \array_chunk($ret, $length);
8391
8392 11
            return \array_map(
8393
                static function (array &$item): string {
8394 11
                    return \implode('', $item);
8395 11
                },
8396 11
                $ret
8397
            );
8398
        }
8399
8400 78
        if (isset($ret[0]) && $ret[0] === '') {
8401
            return [];
8402
        }
8403
8404 78
        return $ret;
8405
    }
8406
8407
    /**
8408
     * Splits the string with the provided regular expression, returning an
8409
     * array of strings. An optional integer $limit will truncate the
8410
     * results.
8411
     *
8412
     * @param string $str
8413
     * @param string $pattern <p>The regex with which to split the string.</p>
8414
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8415
     *
8416
     * @psalm-pure
8417
     *
8418
     * @return string[]
8419
     *                  <p>An array of strings.</p>
8420
     */
8421
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8422
    {
8423 16
        if ($limit === 0) {
8424 2
            return [];
8425
        }
8426
8427 14
        if ($pattern === '') {
8428 1
            return [$str];
8429
        }
8430
8431 13
        if (self::$SUPPORT['mbstring'] === true) {
8432 13
            if ($limit >= 0) {
8433
                /** @noinspection PhpComposerExtensionStubsInspection */
8434 8
                $result_tmp = \mb_split($pattern, $str);
8435
8436 8
                $result = [];
8437 8
                foreach ($result_tmp as $item_tmp) {
8438 8
                    if ($limit === 0) {
8439 4
                        break;
8440
                    }
8441 8
                    --$limit;
8442
8443 8
                    $result[] = $item_tmp;
8444
                }
8445
8446 8
                return $result;
8447
            }
8448
8449
            /** @noinspection PhpComposerExtensionStubsInspection */
8450 5
            return \mb_split($pattern, $str);
8451
        }
8452
8453
        if ($limit > 0) {
8454
            ++$limit;
8455
        } else {
8456
            $limit = -1;
8457
        }
8458
8459
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8460
8461
        if ($array === false) {
8462
            return [];
8463
        }
8464
8465
        if ($limit > 0 && \count($array) === $limit) {
8466
            \array_pop($array);
8467
        }
8468
8469
        return $array;
8470
    }
8471
8472
    /**
8473
     * Check if the string starts with the given substring.
8474
     *
8475
     * EXAMPLE: <code>
8476
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8477
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8478
     * </code>
8479
     *
8480
     * @param string $haystack <p>The string to search in.</p>
8481
     * @param string $needle   <p>The substring to search for.</p>
8482
     *
8483
     * @psalm-pure
8484
     *
8485
     * @return bool
8486
     */
8487
    public static function str_starts_with(string $haystack, string $needle): bool
8488
    {
8489 19
        if ($needle === '') {
8490 2
            return true;
8491
        }
8492
8493 19
        if ($haystack === '') {
8494
            return false;
8495
        }
8496
8497 19
        return \strpos($haystack, $needle) === 0;
8498
    }
8499
8500
    /**
8501
     * Returns true if the string begins with any of $substrings, false otherwise.
8502
     *
8503
     * - case-sensitive
8504
     *
8505
     * @param string $str        <p>The input string.</p>
8506
     * @param array  $substrings <p>Substrings to look for.</p>
8507
     *
8508
     * @psalm-pure
8509
     *
8510
     * @return bool whether or not $str starts with $substring
8511
     */
8512
    public static function str_starts_with_any(string $str, array $substrings): bool
8513
    {
8514 8
        if ($str === '') {
8515
            return false;
8516
        }
8517
8518 8
        if ($substrings === []) {
8519
            return false;
8520
        }
8521
8522 8
        foreach ($substrings as &$substring) {
8523 8
            if (self::str_starts_with($str, $substring)) {
8524 8
                return true;
8525
            }
8526
        }
8527
8528 6
        return false;
8529
    }
8530
8531
    /**
8532
     * Gets the substring after the first occurrence of a separator.
8533
     *
8534
     * @param string $str       <p>The input string.</p>
8535
     * @param string $separator <p>The string separator.</p>
8536
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8537
     *
8538
     * @psalm-pure
8539
     *
8540
     * @return string
8541
     */
8542
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8543
    {
8544 1
        if ($separator === '' || $str === '') {
8545 1
            return '';
8546
        }
8547
8548 1
        if ($encoding === 'UTF-8') {
8549 1
            $offset = \mb_strpos($str, $separator);
8550 1
            if ($offset === false) {
8551 1
                return '';
8552
            }
8553
8554 1
            return (string) \mb_substr(
8555 1
                $str,
8556 1
                $offset + (int) \mb_strlen($separator)
8557
            );
8558
        }
8559
8560
        $offset = self::strpos($str, $separator, 0, $encoding);
8561
        if ($offset === false) {
8562
            return '';
8563
        }
8564
8565
        return (string) \mb_substr(
8566
            $str,
8567
            $offset + (int) self::strlen($separator, $encoding),
8568
            null,
8569
            $encoding
8570
        );
8571
    }
8572
8573
    /**
8574
     * Gets the substring after the last occurrence of a separator.
8575
     *
8576
     * @param string $str       <p>The input string.</p>
8577
     * @param string $separator <p>The string separator.</p>
8578
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8579
     *
8580
     * @psalm-pure
8581
     *
8582
     * @return string
8583
     */
8584
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8585
    {
8586 1
        if ($separator === '' || $str === '') {
8587 1
            return '';
8588
        }
8589
8590 1
        if ($encoding === 'UTF-8') {
8591 1
            $offset = \mb_strrpos($str, $separator);
8592 1
            if ($offset === false) {
8593 1
                return '';
8594
            }
8595
8596 1
            return (string) \mb_substr(
8597 1
                $str,
8598 1
                $offset + (int) \mb_strlen($separator)
8599
            );
8600
        }
8601
8602
        $offset = self::strrpos($str, $separator, 0, $encoding);
8603
        if ($offset === false) {
8604
            return '';
8605
        }
8606
8607
        return (string) self::substr(
8608
            $str,
8609
            $offset + (int) self::strlen($separator, $encoding),
8610
            null,
8611
            $encoding
8612
        );
8613
    }
8614
8615
    /**
8616
     * Gets the substring before the first occurrence of a separator.
8617
     *
8618
     * @param string $str       <p>The input string.</p>
8619
     * @param string $separator <p>The string separator.</p>
8620
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8621
     *
8622
     * @psalm-pure
8623
     *
8624
     * @return string
8625
     */
8626
    public static function str_substr_before_first_separator(
8627
        string $str,
8628
        string $separator,
8629
        string $encoding = 'UTF-8'
8630
    ): string {
8631 1
        if ($separator === '' || $str === '') {
8632 1
            return '';
8633
        }
8634
8635 1
        if ($encoding === 'UTF-8') {
8636 1
            $offset = \mb_strpos($str, $separator);
8637 1
            if ($offset === false) {
8638 1
                return '';
8639
            }
8640
8641 1
            return (string) \mb_substr(
8642 1
                $str,
8643 1
                0,
8644 1
                $offset
8645
            );
8646
        }
8647
8648
        $offset = self::strpos($str, $separator, 0, $encoding);
8649
        if ($offset === false) {
8650
            return '';
8651
        }
8652
8653
        return (string) self::substr(
8654
            $str,
8655
            0,
8656
            $offset,
8657
            $encoding
8658
        );
8659
    }
8660
8661
    /**
8662
     * Gets the substring before the last occurrence of a separator.
8663
     *
8664
     * @param string $str       <p>The input string.</p>
8665
     * @param string $separator <p>The string separator.</p>
8666
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8667
     *
8668
     * @psalm-pure
8669
     *
8670
     * @return string
8671
     */
8672
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8673
    {
8674 1
        if ($separator === '' || $str === '') {
8675 1
            return '';
8676
        }
8677
8678 1
        if ($encoding === 'UTF-8') {
8679 1
            $offset = \mb_strrpos($str, $separator);
8680 1
            if ($offset === false) {
8681 1
                return '';
8682
            }
8683
8684 1
            return (string) \mb_substr(
8685 1
                $str,
8686 1
                0,
8687 1
                $offset
8688
            );
8689
        }
8690
8691
        $offset = self::strrpos($str, $separator, 0, $encoding);
8692
        if ($offset === false) {
8693
            return '';
8694
        }
8695
8696
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8697
8698
        return (string) self::substr(
8699
            $str,
8700
            0,
8701
            $offset,
8702
            $encoding
8703
        );
8704
    }
8705
8706
    /**
8707
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8708
     *
8709
     * @param string $str           <p>The input string.</p>
8710
     * @param string $needle        <p>The string to look for.</p>
8711
     * @param bool   $before_needle [optional] <p>Default: false</p>
8712
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8713
     *
8714
     * @psalm-pure
8715
     *
8716
     * @return string
8717
     */
8718
    public static function str_substr_first(
8719
        string $str,
8720
        string $needle,
8721
        bool $before_needle = false,
8722
        string $encoding = 'UTF-8'
8723
    ): string {
8724 2
        if ($str === '' || $needle === '') {
8725 2
            return '';
8726
        }
8727
8728 2
        if ($encoding === 'UTF-8') {
8729 2
            if ($before_needle) {
8730 1
                $part = \mb_strstr(
8731 1
                    $str,
8732 1
                    $needle,
8733 1
                    $before_needle
8734
                );
8735
            } else {
8736 1
                $part = \mb_strstr(
8737 1
                    $str,
8738 2
                    $needle
8739
                );
8740
            }
8741
        } else {
8742
            $part = self::strstr(
8743
                $str,
8744
                $needle,
8745
                $before_needle,
8746
                $encoding
8747
            );
8748
        }
8749
8750 2
        return $part === false ? '' : $part;
8751
    }
8752
8753
    /**
8754
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8755
     *
8756
     * @param string $str           <p>The input string.</p>
8757
     * @param string $needle        <p>The string to look for.</p>
8758
     * @param bool   $before_needle [optional] <p>Default: false</p>
8759
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8760
     *
8761
     * @psalm-pure
8762
     *
8763
     * @return string
8764
     */
8765
    public static function str_substr_last(
8766
        string $str,
8767
        string $needle,
8768
        bool $before_needle = false,
8769
        string $encoding = 'UTF-8'
8770
    ): string {
8771 2
        if ($str === '' || $needle === '') {
8772 2
            return '';
8773
        }
8774
8775 2
        if ($encoding === 'UTF-8') {
8776 2
            if ($before_needle) {
8777 1
                $part = \mb_strrchr(
8778 1
                    $str,
8779 1
                    $needle,
8780 1
                    $before_needle
8781
                );
8782
            } else {
8783 1
                $part = \mb_strrchr(
8784 1
                    $str,
8785 2
                    $needle
8786
                );
8787
            }
8788
        } else {
8789
            $part = self::strrchr(
8790
                $str,
8791
                $needle,
8792
                $before_needle,
8793
                $encoding
8794
            );
8795
        }
8796
8797 2
        return $part === false ? '' : $part;
8798
    }
8799
8800
    /**
8801
     * Surrounds $str with the given substring.
8802
     *
8803
     * @param string $str
8804
     * @param string $substring <p>The substring to add to both sides.</p>
8805
     *
8806
     * @psalm-pure
8807
     *
8808
     * @return string
8809
     *                <p>A string with the substring both prepended and appended.</p>
8810
     */
8811
    public static function str_surround(string $str, string $substring): string
8812
    {
8813 5
        return $substring . $str . $substring;
8814
    }
8815
8816
    /**
8817
     * Returns a trimmed string with the first letter of each word capitalized.
8818
     * Also accepts an array, $ignore, allowing you to list words not to be
8819
     * capitalized.
8820
     *
8821
     * @param string              $str
8822
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8823
     *                                                           null. Default: null</p>
8824
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8825
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8826
     *                                                           string.</p>
8827
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8828
     *                                                           el, lt, tr</p>
8829
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8830
     *                                                           e.g. ẞ -> ß</p>
8831
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8832
     *                                                           first</p>
8833
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8834
     *                                                           whitespace separator === words.</p>
8835
     *
8836
     * @psalm-pure
8837
     *
8838
     * @return string
8839
     *                <p>The titleized string.</p>
8840
     *
8841
     * @noinspection PhpTooManyParametersInspection
8842
     */
8843
    public static function str_titleize(
8844
        string $str,
8845
        array $ignore = null,
8846
        string $encoding = 'UTF-8',
8847
        bool $clean_utf8 = false,
8848
        string $lang = null,
8849
        bool $try_to_keep_the_string_length = false,
8850
        bool $use_trim_first = true,
8851
        string $word_define_chars = null
8852
    ): string {
8853 10
        if ($str === '') {
8854
            return '';
8855
        }
8856
8857 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8858 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8859
        }
8860
8861 10
        if ($use_trim_first) {
8862 10
            $str = \trim($str);
8863
        }
8864
8865 10
        if ($clean_utf8) {
8866
            $str = self::clean($str);
8867
        }
8868
8869 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8870
8871 10
        if ($word_define_chars) {
8872 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8873
        } else {
8874 6
            $word_define_chars = '';
8875
        }
8876
8877 10
        $str = (string) \preg_replace_callback(
8878 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8879
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8880 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8881 4
                    return $match[0];
8882
                }
8883
8884 10
                if ($use_mb_functions) {
8885 10
                    if ($encoding === 'UTF-8') {
8886 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8887 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8888
                    }
8889
8890
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8891
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8892
                }
8893
8894
                return self::ucfirst(
8895
                    self::strtolower(
8896
                        $match[0],
8897
                        $encoding,
8898
                        false,
8899
                        $lang,
8900
                        $try_to_keep_the_string_length
8901
                    ),
8902
                    $encoding,
8903
                    false,
8904
                    $lang,
8905
                    $try_to_keep_the_string_length
8906
                );
8907 10
            },
8908 10
            $str
8909
        );
8910
8911 10
        return $str;
8912
    }
8913
8914
    /**
8915
     * Returns a trimmed string in proper title case.
8916
     *
8917
     * Also accepts an array, $ignore, allowing you to list words not to be
8918
     * capitalized.
8919
     *
8920
     * Adapted from John Gruber's script.
8921
     *
8922
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8923
     *
8924
     * @param string $str
8925
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8926
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8927
     *
8928
     * @psalm-pure
8929
     *
8930
     * @return string
8931
     *                <p>The titleized string.</p>
8932
     */
8933
    public static function str_titleize_for_humans(
8934
        string $str,
8935
        array $ignore = [],
8936
        string $encoding = 'UTF-8'
8937
    ): string {
8938 35
        if ($str === '') {
8939
            return '';
8940
        }
8941
8942
        $small_words = [
8943 35
            '(?<!q&)a',
8944
            'an',
8945
            'and',
8946
            'as',
8947
            'at(?!&t)',
8948
            'but',
8949
            'by',
8950
            'en',
8951
            'for',
8952
            'if',
8953
            'in',
8954
            'of',
8955
            'on',
8956
            'or',
8957
            'the',
8958
            'to',
8959
            'v[.]?',
8960
            'via',
8961
            'vs[.]?',
8962
        ];
8963
8964 35
        if ($ignore !== []) {
8965 1
            $small_words = \array_merge($small_words, $ignore);
8966
        }
8967
8968 35
        $small_words_rx = \implode('|', $small_words);
8969 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
8970
8971 35
        $str = \trim($str);
8972
8973 35
        if (!self::has_lowercase($str)) {
8974 2
            $str = self::strtolower($str, $encoding);
8975
        }
8976
8977
        // the main substitutions
8978
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
8979 35
        $str = (string) \preg_replace_callback(
8980
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
8981
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
8982 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
8983
                        |
8984 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
8985
                        |
8986 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
8987
                        |
8988 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
8989
                      ) (_*) \\b                                                          # 6. With trailing underscore
8990
                    ~ux',
8991
            /**
8992
             * @param string[] $matches
8993
             *
8994
             * @psalm-pure
8995
             *
8996
             * @return string
8997
             */
8998
            static function (array $matches) use ($encoding): string {
8999
                // preserve leading underscore
9000 35
                $str = $matches[1];
9001 35
                if ($matches[2]) {
9002
                    // preserve URLs, domains, emails and file paths
9003 5
                    $str .= $matches[2];
9004 35
                } elseif ($matches[3]) {
9005
                    // lower-case small words
9006 25
                    $str .= self::strtolower($matches[3], $encoding);
9007 35
                } elseif ($matches[4]) {
9008
                    // capitalize word w/o internal caps
9009 34
                    $str .= static::ucfirst($matches[4], $encoding);
9010
                } else {
9011
                    // preserve other kinds of word (iPhone)
9012 7
                    $str .= $matches[5];
9013
                }
9014
                // preserve trailing underscore
9015 35
                $str .= $matches[6];
9016
9017 35
                return $str;
9018 35
            },
9019 35
            $str
9020
        );
9021
9022
        // Exceptions for small words: capitalize at start of title...
9023 35
        $str = (string) \preg_replace_callback(
9024
            '~(  \\A [[:punct:]]*            # start of title...
9025
                      |  [:.;?!][ ]+                # or of subsentence...
9026
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9027 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9028
                     ~uxi',
9029
            /**
9030
             * @param string[] $matches
9031
             *
9032
             * @psalm-pure
9033
             *
9034
             * @return string
9035
             */
9036
            static function (array $matches) use ($encoding): string {
9037 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9038 35
            },
9039 35
            $str
9040
        );
9041
9042
        // ...and end of title
9043 35
        $str = (string) \preg_replace_callback(
9044 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9045
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9046
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9047
                     ~uxi',
9048
            /**
9049
             * @param string[] $matches
9050
             *
9051
             * @psalm-pure
9052
             *
9053
             * @return string
9054
             */
9055
            static function (array $matches) use ($encoding): string {
9056 3
                return static::ucfirst($matches[1], $encoding);
9057 35
            },
9058 35
            $str
9059
        );
9060
9061
        // Exceptions for small words in hyphenated compound words.
9062
        // e.g. "in-flight" -> In-Flight
9063 35
        $str = (string) \preg_replace_callback(
9064
            '~\\b
9065
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9066 35
                        ( ' . $small_words_rx . ' )
9067
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9068
                       ~uxi',
9069
            /**
9070
             * @param string[] $matches
9071
             *
9072
             * @psalm-pure
9073
             *
9074
             * @return string
9075
             */
9076
            static function (array $matches) use ($encoding): string {
9077
                return static::ucfirst($matches[1], $encoding);
9078 35
            },
9079 35
            $str
9080
        );
9081
9082
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9083 35
        $str = (string) \preg_replace_callback(
9084
            '~\\b
9085
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9086
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9087 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9088
                      (?!	- )                 # Negative lookahead for another -
9089
                     ~uxi',
9090
            /**
9091
             * @param string[] $matches
9092
             *
9093
             * @psalm-pure
9094
             *
9095
             * @return string
9096
             */
9097
            static function (array $matches) use ($encoding): string {
9098
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9099 35
            },
9100 35
            $str
9101
        );
9102
9103 35
        return $str;
9104
    }
9105
9106
    /**
9107
     * Get a binary representation of a specific string.
9108
     *
9109
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9110
     *
9111
     * @param string $str <p>The input string.</p>
9112
     *
9113
     * @psalm-pure
9114
     *
9115
     * @return false|string
9116
     *                      <p>false on error</p>
9117
     */
9118
    public static function str_to_binary(string $str)
9119
    {
9120
        /** @var array|false $value - needed for PhpStan (stubs error) */
9121 2
        $value = \unpack('H*', $str);
9122 2
        if ($value === false) {
9123
            return false;
9124
        }
9125
9126
        /** @noinspection OffsetOperationsInspection */
9127 2
        return \base_convert($value[1], 16, 2);
9128
    }
9129
9130
    /**
9131
     * @param string   $str
9132
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9133
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9134
     *
9135
     * @psalm-pure
9136
     *
9137
     * @return string[]
9138
     */
9139
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9140
    {
9141 17
        if ($str === '') {
9142 1
            return $remove_empty_values ? [] : [''];
9143
        }
9144
9145 16
        if (self::$SUPPORT['mbstring'] === true) {
9146
            /** @noinspection PhpComposerExtensionStubsInspection */
9147 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9148
        } else {
9149
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9150
        }
9151
9152 16
        if ($return === false) {
9153
            return $remove_empty_values ? [] : [''];
9154
        }
9155
9156
        if (
9157 16
            $remove_short_values === null
9158
            &&
9159 16
            !$remove_empty_values
9160
        ) {
9161 16
            return $return;
9162
        }
9163
9164
        return self::reduce_string_array(
9165
            $return,
9166
            $remove_empty_values,
9167
            $remove_short_values
9168
        );
9169
    }
9170
9171
    /**
9172
     * Convert a string into an array of words.
9173
     *
9174
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9175
     *
9176
     * @param string   $str
9177
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9178
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9179
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9180
     *
9181
     * @psalm-pure
9182
     *
9183
     * @return string[]
9184
     */
9185
    public static function str_to_words(
9186
        string $str,
9187
        string $char_list = '',
9188
        bool $remove_empty_values = false,
9189
        int $remove_short_values = null
9190
    ): array {
9191 13
        if ($str === '') {
9192 4
            return $remove_empty_values ? [] : [''];
9193
        }
9194
9195 13
        $char_list = self::rxClass($char_list, '\pL');
9196
9197 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9198 13
        if ($return === false) {
9199
            return $remove_empty_values ? [] : [''];
9200
        }
9201
9202
        if (
9203 13
            $remove_short_values === null
9204
            &&
9205 13
            !$remove_empty_values
9206
        ) {
9207 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9208
        }
9209
9210 2
        $tmp_return = self::reduce_string_array(
9211 2
            $return,
9212 2
            $remove_empty_values,
9213 2
            $remove_short_values
9214
        );
9215
9216 2
        foreach ($tmp_return as &$item) {
9217 2
            $item = (string) $item;
9218
        }
9219
9220 2
        return $tmp_return;
9221
    }
9222
9223
    /**
9224
     * alias for "UTF8::to_ascii()"
9225
     *
9226
     * @param string $str
9227
     * @param string $unknown
9228
     * @param bool   $strict
9229
     *
9230
     * @psalm-pure
9231
     *
9232
     * @return string
9233
     *
9234
     * @see        UTF8::to_ascii()
9235
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9236
     */
9237
    public static function str_transliterate(
9238
        string $str,
9239
        string $unknown = '?',
9240
        bool $strict = false
9241
    ): string {
9242 7
        return self::to_ascii($str, $unknown, $strict);
9243
    }
9244
9245
    /**
9246
     * Truncates the string to a given length. If $substring is provided, and
9247
     * truncating occurs, the string is further truncated so that the substring
9248
     * may be appended without exceeding the desired length.
9249
     *
9250
     * @param string $str
9251
     * @param int    $length    <p>Desired length of the truncated string.</p>
9252
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9253
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9254
     *
9255
     * @psalm-pure
9256
     *
9257
     * @return string
9258
     *                <p>A string after truncating.</p>
9259
     */
9260
    public static function str_truncate(
9261
        string $str,
9262
        int $length,
9263
        string $substring = '',
9264
        string $encoding = 'UTF-8'
9265
    ): string {
9266 22
        if ($str === '') {
9267
            return '';
9268
        }
9269
9270 22
        if ($encoding === 'UTF-8') {
9271 10
            if ($length >= (int) \mb_strlen($str)) {
9272 2
                return $str;
9273
            }
9274
9275 8
            if ($substring !== '') {
9276 4
                $length -= (int) \mb_strlen($substring);
9277
9278
                /** @noinspection UnnecessaryCastingInspection */
9279 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9280
            }
9281
9282
            /** @noinspection UnnecessaryCastingInspection */
9283 4
            return (string) \mb_substr($str, 0, $length);
9284
        }
9285
9286 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9287
9288 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9289 2
            return $str;
9290
        }
9291
9292 10
        if ($substring !== '') {
9293 6
            $length -= (int) self::strlen($substring, $encoding);
9294
        }
9295
9296
        return (
9297 10
               (string) self::substr(
9298 10
                   $str,
9299 10
                   0,
9300 10
                   $length,
9301 10
                   $encoding
9302
               )
9303 10
               ) . $substring;
9304
    }
9305
9306
    /**
9307
     * Truncates the string to a given length, while ensuring that it does not
9308
     * split words. If $substring is provided, and truncating occurs, the
9309
     * string is further truncated so that the substring may be appended without
9310
     * exceeding the desired length.
9311
     *
9312
     * @param string $str
9313
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9314
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9315
     *                                                       Default:
9316
     *                                                       ''</p>
9317
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9318
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9319
     *
9320
     * @psalm-pure
9321
     *
9322
     * @return string
9323
     *                <p>A string after truncating.</p>
9324
     */
9325
    public static function str_truncate_safe(
9326
        string $str,
9327
        int $length,
9328
        string $substring = '',
9329
        string $encoding = 'UTF-8',
9330
        bool $ignore_do_not_split_words_for_one_word = false
9331
    ): string {
9332 47
        if ($str === '' || $length <= 0) {
9333 1
            return $substring;
9334
        }
9335
9336 47
        if ($encoding === 'UTF-8') {
9337 21
            if ($length >= (int) \mb_strlen($str)) {
9338 5
                return $str;
9339
            }
9340
9341
            // need to further trim the string so we can append the substring
9342 17
            $length -= (int) \mb_strlen($substring);
9343 17
            if ($length <= 0) {
9344 1
                return $substring;
9345
            }
9346
9347
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9348 17
            $truncated = \mb_substr($str, 0, $length);
9349 17
            if ($truncated === false) {
9350
                return '';
9351
            }
9352
9353
            // if the last word was truncated
9354 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9355 17
            if ($space_position !== $length) {
9356
                // find pos of the last occurrence of a space, get up to that
9357 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9358
9359
                if (
9360 13
                    $last_position !== false
9361
                    ||
9362
                    (
9363 3
                        $space_position !== false
9364
                        &&
9365 13
                         !$ignore_do_not_split_words_for_one_word
9366
                    )
9367
                ) {
9368 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9369
                }
9370
            }
9371
        } else {
9372 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9373
9374 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9375 4
                return $str;
9376
            }
9377
9378
            // need to further trim the string so we can append the substring
9379 22
            $length -= (int) self::strlen($substring, $encoding);
9380 22
            if ($length <= 0) {
9381
                return $substring;
9382
            }
9383
9384 22
            $truncated = self::substr($str, 0, $length, $encoding);
9385
9386 22
            if ($truncated === false) {
9387
                return '';
9388
            }
9389
9390
            // if the last word was truncated
9391 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9392 22
            if ($space_position !== $length) {
9393
                // find pos of the last occurrence of a space, get up to that
9394 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9395
9396
                if (
9397 12
                    $last_position !== false
9398
                    ||
9399
                    (
9400 4
                        $space_position !== false
9401
                        &&
9402 12
                        !$ignore_do_not_split_words_for_one_word
9403
                    )
9404
                ) {
9405 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9406
                }
9407
            }
9408
        }
9409
9410 39
        return $truncated . $substring;
9411
    }
9412
9413
    /**
9414
     * Returns a lowercase and trimmed string separated by underscores.
9415
     * Underscores are inserted before uppercase characters (with the exception
9416
     * of the first character of the string), and in place of spaces as well as
9417
     * dashes.
9418
     *
9419
     * @param string $str
9420
     *
9421
     * @psalm-pure
9422
     *
9423
     * @return string
9424
     *                <p>The underscored string.</p>
9425
     */
9426
    public static function str_underscored(string $str): string
9427
    {
9428 16
        return self::str_delimit($str, '_');
9429
    }
9430
9431
    /**
9432
     * Returns an UpperCamelCase version of the supplied string. It trims
9433
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9434
     * and underscores, and removes spaces, dashes, underscores.
9435
     *
9436
     * @param string      $str                           <p>The input string.</p>
9437
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9438
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9439
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9440
     *                                                   tr</p>
9441
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9442
     *                                                   -> ß</p>
9443
     *
9444
     * @psalm-pure
9445
     *
9446
     * @return string
9447
     *                <p>A string in UpperCamelCase.</p>
9448
     */
9449
    public static function str_upper_camelize(
9450
        string $str,
9451
        string $encoding = 'UTF-8',
9452
        bool $clean_utf8 = false,
9453
        string $lang = null,
9454
        bool $try_to_keep_the_string_length = false
9455
    ): string {
9456 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9457
    }
9458
9459
    /**
9460
     * alias for "UTF8::ucfirst()"
9461
     *
9462
     * @param string      $str
9463
     * @param string      $encoding
9464
     * @param bool        $clean_utf8
9465
     * @param string|null $lang
9466
     * @param bool        $try_to_keep_the_string_length
9467
     *
9468
     * @psalm-pure
9469
     *
9470
     * @return string
9471
     *
9472
     * @see        UTF8::ucfirst()
9473
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9474
     */
9475
    public static function str_upper_first(
9476
        string $str,
9477
        string $encoding = 'UTF-8',
9478
        bool $clean_utf8 = false,
9479
        string $lang = null,
9480
        bool $try_to_keep_the_string_length = false
9481
    ): string {
9482 5
        return self::ucfirst(
9483 5
            $str,
9484 5
            $encoding,
9485 5
            $clean_utf8,
9486 5
            $lang,
9487 5
            $try_to_keep_the_string_length
9488
        );
9489
    }
9490
9491
    /**
9492
     * Get the number of words in a specific string.
9493
     *
9494
     * EXAMPLES: <code>
9495
     * // format: 0 -> return only word count (int)
9496
     * //
9497
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9498
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9499
     *
9500
     * // format: 1 -> return words (array)
9501
     * //
9502
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9503
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9504
     *
9505
     * // format: 2 -> return words with offset (array)
9506
     * //
9507
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9508
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9509
     * </code>
9510
     *
9511
     * @param string $str       <p>The input string.</p>
9512
     * @param int    $format    [optional] <p>
9513
     *                          <strong>0</strong> => return a number of words (default)<br>
9514
     *                          <strong>1</strong> => return an array of words<br>
9515
     *                          <strong>2</strong> => return an array of words with word-offset as key
9516
     *                          </p>
9517
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9518
     *
9519
     * @psalm-pure
9520
     *
9521
     * @return int|string[]
9522
     *                      <p>The number of words in the string.</p>
9523
     */
9524
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9525
    {
9526 2
        $str_parts = self::str_to_words($str, $char_list);
9527
9528 2
        $len = \count($str_parts);
9529
9530 2
        if ($format === 1) {
9531 2
            $number_of_words = [];
9532 2
            for ($i = 1; $i < $len; $i += 2) {
9533 2
                $number_of_words[] = $str_parts[$i];
9534
            }
9535 2
        } elseif ($format === 2) {
9536 2
            $number_of_words = [];
9537 2
            $offset = (int) self::strlen($str_parts[0]);
9538 2
            for ($i = 1; $i < $len; $i += 2) {
9539 2
                $number_of_words[$offset] = $str_parts[$i];
9540 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9541
            }
9542
        } else {
9543 2
            $number_of_words = (int) (($len - 1) / 2);
9544
        }
9545
9546 2
        return $number_of_words;
9547
    }
9548
9549
    /**
9550
     * Case-insensitive string comparison.
9551
     *
9552
     * INFO: Case-insensitive version of UTF8::strcmp()
9553
     *
9554
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9555
     *
9556
     * @param string $str1     <p>The first string.</p>
9557
     * @param string $str2     <p>The second string.</p>
9558
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9559
     *
9560
     * @psalm-pure
9561
     *
9562
     * @return int
9563
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9564
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9565
     *             <strong>0</strong> if they are equal
9566
     */
9567
    public static function strcasecmp(
9568
        string $str1,
9569
        string $str2,
9570
        string $encoding = 'UTF-8'
9571
    ): int {
9572 23
        return self::strcmp(
9573 23
            self::strtocasefold(
9574 23
                $str1,
9575 23
                true,
9576 23
                false,
9577 23
                $encoding,
9578 23
                null,
9579 23
                false
9580
            ),
9581 23
            self::strtocasefold(
9582 23
                $str2,
9583 23
                true,
9584 23
                false,
9585 23
                $encoding,
9586 23
                null,
9587 23
                false
9588
            )
9589
        );
9590
    }
9591
9592
    /**
9593
     * alias for "UTF8::strstr()"
9594
     *
9595
     * @param string $haystack
9596
     * @param string $needle
9597
     * @param bool   $before_needle
9598
     * @param string $encoding
9599
     * @param bool   $clean_utf8
9600
     *
9601
     * @psalm-pure
9602
     *
9603
     * @return false|string
9604
     *
9605
     * @see        UTF8::strstr()
9606
     * @deprecated <p>please use "UTF8::strstr()"</p>
9607
     */
9608
    public static function strchr(
9609
        string $haystack,
9610
        string $needle,
9611
        bool $before_needle = false,
9612
        string $encoding = 'UTF-8',
9613
        bool $clean_utf8 = false
9614
    ) {
9615 2
        return self::strstr(
9616 2
            $haystack,
9617 2
            $needle,
9618 2
            $before_needle,
9619 2
            $encoding,
9620 2
            $clean_utf8
9621
        );
9622
    }
9623
9624
    /**
9625
     * Case-sensitive string comparison.
9626
     *
9627
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9628
     *
9629
     * @param string $str1 <p>The first string.</p>
9630
     * @param string $str2 <p>The second string.</p>
9631
     *
9632
     * @psalm-pure
9633
     *
9634
     * @return int
9635
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9636
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9637
     *             <strong>0</strong> if they are equal
9638
     */
9639
    public static function strcmp(string $str1, string $str2): int
9640
    {
9641 29
        if ($str1 === $str2) {
9642 21
            return 0;
9643
        }
9644
9645 24
        return \strcmp(
9646 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9647 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9648
        );
9649
    }
9650
9651
    /**
9652
     * Find length of initial segment not matching mask.
9653
     *
9654
     * @param string $str
9655
     * @param string $char_list
9656
     * @param int    $offset
9657
     * @param int    $length
9658
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9659
     *
9660
     * @psalm-pure
9661
     *
9662
     * @return int
9663
     */
9664
    public static function strcspn(
9665
        string $str,
9666
        string $char_list,
9667
        int $offset = null,
9668
        int $length = null,
9669
        string $encoding = 'UTF-8'
9670
    ): int {
9671 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9672
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9673
        }
9674
9675 12
        if ($char_list === '') {
9676 2
            return (int) self::strlen($str, $encoding);
9677
        }
9678
9679 11
        if ($offset !== null || $length !== null) {
9680 3
            if ($encoding === 'UTF-8') {
9681 3
                if ($length === null) {
9682
                    /** @noinspection UnnecessaryCastingInspection */
9683 2
                    $str_tmp = \mb_substr($str, (int) $offset);
9684
                } else {
9685
                    /** @noinspection UnnecessaryCastingInspection */
9686 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
9687
                }
9688
            } else {
9689
                /** @noinspection UnnecessaryCastingInspection */
9690
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
9691
            }
9692
9693 3
            if ($str_tmp === false) {
9694
                return 0;
9695
            }
9696
9697
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9698 3
            $str = $str_tmp;
9699
        }
9700
9701 11
        if ($str === '') {
9702 2
            return 0;
9703
        }
9704
9705 10
        $matches = [];
9706 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9707 9
            $return = self::strlen($matches[1], $encoding);
9708 9
            if ($return === false) {
9709
                return 0;
9710
            }
9711
9712 9
            return $return;
9713
        }
9714
9715 2
        return (int) self::strlen($str, $encoding);
9716
    }
9717
9718
    /**
9719
     * alias for "UTF8::stristr()"
9720
     *
9721
     * @param string $haystack
9722
     * @param string $needle
9723
     * @param bool   $before_needle
9724
     * @param string $encoding
9725
     * @param bool   $clean_utf8
9726
     *
9727
     * @psalm-pure
9728
     *
9729
     * @return false|string
9730
     *
9731
     * @see        UTF8::stristr()
9732
     * @deprecated <p>please use "UTF8::stristr()"</p>
9733
     */
9734
    public static function strichr(
9735
        string $haystack,
9736
        string $needle,
9737
        bool $before_needle = false,
9738
        string $encoding = 'UTF-8',
9739
        bool $clean_utf8 = false
9740
    ) {
9741 1
        return self::stristr(
9742 1
            $haystack,
9743 1
            $needle,
9744 1
            $before_needle,
9745 1
            $encoding,
9746 1
            $clean_utf8
9747
        );
9748
    }
9749
9750
    /**
9751
     * Create a UTF-8 string from code points.
9752
     *
9753
     * INFO: opposite to UTF8::codepoints()
9754
     *
9755
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9756
     *
9757
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9758
     *
9759
     * @psalm-param int[]|numeric-string[]|int|numeric-string $intOrHex
9760
     *
9761
     * @psalm-pure
9762
     *
9763
     * @return string
9764
     *                <p>A UTF-8 encoded string.</p>
9765
     */
9766
    public static function string($intOrHex): string
9767
    {
9768 4
        if ($intOrHex === []) {
9769 4
            return '';
9770
        }
9771
9772 4
        if (!\is_array($intOrHex)) {
9773 1
            $intOrHex = [$intOrHex];
9774
        }
9775
9776 4
        $str = '';
9777 4
        foreach ($intOrHex as $strPart) {
9778 4
            $str .= '&#' . (int) $strPart . ';';
9779
        }
9780
9781 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9782
    }
9783
9784
    /**
9785
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9786
     *
9787
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9788
     *
9789
     * @param string $str <p>The input string.</p>
9790
     *
9791
     * @psalm-pure
9792
     *
9793
     * @return bool
9794
     *              <strong>true</strong> if the string has BOM at the start,<br>
9795
     *              <strong>false</strong> otherwise
9796
     */
9797
    public static function string_has_bom(string $str): bool
9798
    {
9799
        /** @noinspection PhpUnusedLocalVariableInspection */
9800 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9801 6
            if (\strpos($str, $bom_string) === 0) {
9802 6
                return true;
9803
            }
9804
        }
9805
9806 6
        return false;
9807
    }
9808
9809
    /**
9810
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9811
     *
9812
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9813
     *
9814
     * @see http://php.net/manual/en/function.strip-tags.php
9815
     *
9816
     * @param string $str            <p>
9817
     *                               The input string.
9818
     *                               </p>
9819
     * @param string $allowable_tags [optional] <p>
9820
     *                               You can use the optional second parameter to specify tags which should
9821
     *                               not be stripped.
9822
     *                               </p>
9823
     *                               <p>
9824
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
9825
     *                               can not be changed with allowable_tags.
9826
     *                               </p>
9827
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9828
     *
9829
     * @psalm-pure
9830
     *
9831
     * @return string
9832
     *                <p>The stripped string.</p>
9833
     */
9834
    public static function strip_tags(
9835
        string $str,
9836
        string $allowable_tags = null,
9837
        bool $clean_utf8 = false
9838
    ): string {
9839 4
        if ($str === '') {
9840 1
            return '';
9841
        }
9842
9843 4
        if ($clean_utf8) {
9844 2
            $str = self::clean($str);
9845
        }
9846
9847 4
        if ($allowable_tags === null) {
9848 4
            return \strip_tags($str);
9849
        }
9850
9851 2
        return \strip_tags($str, $allowable_tags);
9852
    }
9853
9854
    /**
9855
     * Strip all whitespace characters. This includes tabs and newline
9856
     * characters, as well as multibyte whitespace such as the thin space
9857
     * and ideographic space.
9858
     *
9859
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9860
     *
9861
     * @param string $str
9862
     *
9863
     * @psalm-pure
9864
     *
9865
     * @return string
9866
     */
9867
    public static function strip_whitespace(string $str): string
9868
    {
9869 36
        if ($str === '') {
9870 3
            return '';
9871
        }
9872
9873 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9874
    }
9875
9876
    /**
9877
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9878
     *
9879
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9880
     *
9881
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9882
     *
9883
     * @see http://php.net/manual/en/function.mb-stripos.php
9884
     *
9885
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9886
     * @param string $needle     <p>The string to find in haystack.</p>
9887
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9888
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9889
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9890
     *
9891
     * @psalm-pure
9892
     *
9893
     * @return false|int
9894
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9895
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9896
     */
9897
    public static function stripos(
9898
        string $haystack,
9899
        string $needle,
9900
        int $offset = 0,
9901
        string $encoding = 'UTF-8',
9902
        bool $clean_utf8 = false
9903
    ) {
9904 25
        if ($haystack === '' || $needle === '') {
9905 5
            return false;
9906
        }
9907
9908 24
        if ($clean_utf8) {
9909
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9910
            // if invalid characters are found in $haystack before $needle
9911 1
            $haystack = self::clean($haystack);
9912 1
            $needle = self::clean($needle);
9913
        }
9914
9915 24
        if (self::$SUPPORT['mbstring'] === true) {
9916 24
            if ($encoding === 'UTF-8') {
9917 24
                return \mb_stripos($haystack, $needle, $offset);
9918
            }
9919
9920 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9921
9922 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9923
        }
9924
9925 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9926
9927
        if (
9928 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9929
            &&
9930 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9931
            &&
9932 2
            self::$SUPPORT['intl'] === true
9933
        ) {
9934
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9935
            if ($return_tmp !== false) {
9936
                return $return_tmp;
9937
            }
9938
        }
9939
9940
        //
9941
        // fallback for ascii only
9942
        //
9943
9944 2
        if (ASCII::is_ascii($haystack . $needle)) {
9945
            return \stripos($haystack, $needle, $offset);
9946
        }
9947
9948
        //
9949
        // fallback via vanilla php
9950
        //
9951
9952 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9953 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9954
9955 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9956
    }
9957
9958
    /**
9959
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
9960
     *
9961
     * EXAMPLE: <code>
9962
     * $str = 'iñtërnâtiônàlizætiøn';
9963
     * $search = 'NÂT';
9964
     *
9965
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
9966
     * UTF8::stristr($str, $search, true)); // 'iñtër'
9967
     * </code>
9968
     *
9969
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9970
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9971
     * @param bool   $before_needle [optional] <p>
9972
     *                              If <b>TRUE</b>, it returns the part of the
9973
     *                              haystack before the first occurrence of the needle (excluding the needle).
9974
     *                              </p>
9975
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9976
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9977
     *
9978
     * @psalm-pure
9979
     *
9980
     * @return false|string
9981
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
9982
     */
9983
    public static function stristr(
9984
        string $haystack,
9985
        string $needle,
9986
        bool $before_needle = false,
9987
        string $encoding = 'UTF-8',
9988
        bool $clean_utf8 = false
9989
    ) {
9990 12
        if ($haystack === '' || $needle === '') {
9991 3
            return false;
9992
        }
9993
9994 9
        if ($clean_utf8) {
9995
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9996
            // if invalid characters are found in $haystack before $needle
9997 1
            $needle = self::clean($needle);
9998 1
            $haystack = self::clean($haystack);
9999
        }
10000
10001 9
        if (!$needle) {
10002
            return $haystack;
10003
        }
10004
10005 9
        if (self::$SUPPORT['mbstring'] === true) {
10006 9
            if ($encoding === 'UTF-8') {
10007 9
                return \mb_stristr($haystack, $needle, $before_needle);
10008
            }
10009
10010 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10011
10012 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10013
        }
10014
10015
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10016
10017
        if (
10018
            $encoding !== 'UTF-8'
10019
            &&
10020
            self::$SUPPORT['mbstring'] === false
10021
        ) {
10022
            /**
10023
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10024
             */
10025
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10026
        }
10027
10028
        if (
10029
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10030
            &&
10031
            self::$SUPPORT['intl'] === true
10032
        ) {
10033
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10034
            if ($return_tmp !== false) {
10035
                return $return_tmp;
10036
            }
10037
        }
10038
10039
        if (ASCII::is_ascii($needle . $haystack)) {
10040
            return \stristr($haystack, $needle, $before_needle);
10041
        }
10042
10043
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10044
10045
        if (!isset($match[1])) {
10046
            return false;
10047
        }
10048
10049
        if ($before_needle) {
10050
            return $match[1];
10051
        }
10052
10053
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10054
    }
10055
10056
    /**
10057
     * Get the string length, not the byte-length!
10058
     *
10059
     * INFO: use UTF8::strwidth() for the char-length
10060
     *
10061
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10062
     *
10063
     * @see http://php.net/manual/en/function.mb-strlen.php
10064
     *
10065
     * @param string $str        <p>The string being checked for length.</p>
10066
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10067
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10068
     *
10069
     * @psalm-pure
10070
     *
10071
     * @return false|int
10072
     *                   <p>
10073
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10074
     *                   $encoding.
10075
     *                   (One multi-byte character counted as +1).
10076
     *                   <br>
10077
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10078
     *                   chars.
10079
     *                   </p>
10080
     */
10081
    public static function strlen(
10082
        string $str,
10083
        string $encoding = 'UTF-8',
10084
        bool $clean_utf8 = false
10085
    ) {
10086 174
        if ($str === '') {
10087 21
            return 0;
10088
        }
10089
10090 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10091 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10092
        }
10093
10094 172
        if ($clean_utf8) {
10095
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10096
            // if invalid characters are found in $str
10097 4
            $str = self::clean($str);
10098
        }
10099
10100
        //
10101
        // fallback via mbstring
10102
        //
10103
10104 172
        if (self::$SUPPORT['mbstring'] === true) {
10105 166
            if ($encoding === 'UTF-8') {
10106
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10107 166
                return @\mb_strlen($str);
10108
            }
10109
10110
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10111 4
            return @\mb_strlen($str, $encoding);
10112
        }
10113
10114
        //
10115
        // fallback for binary || ascii only
10116
        //
10117
10118
        if (
10119 8
            $encoding === 'CP850'
10120
            ||
10121 8
            $encoding === 'ASCII'
10122
        ) {
10123
            return \strlen($str);
10124
        }
10125
10126
        if (
10127 8
            $encoding !== 'UTF-8'
10128
            &&
10129 8
            self::$SUPPORT['mbstring'] === false
10130
            &&
10131 8
            self::$SUPPORT['iconv'] === false
10132
        ) {
10133
            /**
10134
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10135
             */
10136 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10137
        }
10138
10139
        //
10140
        // fallback via iconv
10141
        //
10142
10143 8
        if (self::$SUPPORT['iconv'] === true) {
10144
            $return_tmp = \iconv_strlen($str, $encoding);
10145
            if ($return_tmp !== false) {
10146
                return $return_tmp;
10147
            }
10148
        }
10149
10150
        //
10151
        // fallback via intl
10152
        //
10153
10154
        if (
10155 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10156
            &&
10157 8
            self::$SUPPORT['intl'] === true
10158
        ) {
10159
            $return_tmp = \grapheme_strlen($str);
10160
            if ($return_tmp !== null) {
10161
                return $return_tmp;
10162
            }
10163
        }
10164
10165
        //
10166
        // fallback for ascii only
10167
        //
10168
10169 8
        if (ASCII::is_ascii($str)) {
10170 4
            return \strlen($str);
10171
        }
10172
10173
        //
10174
        // fallback via vanilla php
10175
        //
10176
10177 8
        \preg_match_all('/./us', $str, $parts);
10178
10179 8
        $return_tmp = \count($parts[0]);
10180 8
        if ($return_tmp === 0) {
10181
            return false;
10182
        }
10183
10184 8
        return $return_tmp;
10185
    }
10186
10187
    /**
10188
     * Get string length in byte.
10189
     *
10190
     * @param string $str
10191
     *
10192
     * @psalm-pure
10193
     *
10194
     * @return int
10195
     */
10196
    public static function strlen_in_byte(string $str): int
10197
    {
10198 1
        if ($str === '') {
10199
            return 0;
10200
        }
10201
10202 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10203
            // "mb_" is available if overload is used, so use it ...
10204
            return \mb_strlen($str, 'CP850'); // 8-BIT
10205
        }
10206
10207 1
        return \strlen($str);
10208
    }
10209
10210
    /**
10211
     * Case-insensitive string comparisons using a "natural order" algorithm.
10212
     *
10213
     * INFO: natural order version of UTF8::strcasecmp()
10214
     *
10215
     * EXAMPLES: <code>
10216
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10217
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10218
     *
10219
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10220
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10221
     * </code>
10222
     *
10223
     * @param string $str1     <p>The first string.</p>
10224
     * @param string $str2     <p>The second string.</p>
10225
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10226
     *
10227
     * @psalm-pure
10228
     *
10229
     * @return int
10230
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10231
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10232
     *             <strong>0</strong> if they are equal
10233
     */
10234
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10235
    {
10236 2
        return self::strnatcmp(
10237 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10238 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10239
        );
10240
    }
10241
10242
    /**
10243
     * String comparisons using a "natural order" algorithm
10244
     *
10245
     * INFO: natural order version of UTF8::strcmp()
10246
     *
10247
     * EXAMPLES: <code>
10248
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10249
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10250
     *
10251
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10252
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10253
     * </code>
10254
     *
10255
     * @see http://php.net/manual/en/function.strnatcmp.php
10256
     *
10257
     * @param string $str1 <p>The first string.</p>
10258
     * @param string $str2 <p>The second string.</p>
10259
     *
10260
     * @psalm-pure
10261
     *
10262
     * @return int
10263
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10264
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10265
     *             <strong>0</strong> if they are equal
10266
     */
10267
    public static function strnatcmp(string $str1, string $str2): int
10268
    {
10269 4
        if ($str1 === $str2) {
10270 4
            return 0;
10271
        }
10272
10273 4
        return \strnatcmp(
10274 4
            (string) self::strtonatfold($str1),
10275 4
            (string) self::strtonatfold($str2)
10276
        );
10277
    }
10278
10279
    /**
10280
     * Case-insensitive string comparison of the first n characters.
10281
     *
10282
     * EXAMPLE: <code>
10283
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10284
     * </code>
10285
     *
10286
     * @see http://php.net/manual/en/function.strncasecmp.php
10287
     *
10288
     * @param string $str1     <p>The first string.</p>
10289
     * @param string $str2     <p>The second string.</p>
10290
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10291
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10292
     *
10293
     * @psalm-pure
10294
     *
10295
     * @return int
10296
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10297
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10298
     *             <strong>0</strong> if they are equal
10299
     */
10300
    public static function strncasecmp(
10301
        string $str1,
10302
        string $str2,
10303
        int $len,
10304
        string $encoding = 'UTF-8'
10305
    ): int {
10306 2
        return self::strncmp(
10307 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10308 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10309 2
            $len
10310
        );
10311
    }
10312
10313
    /**
10314
     * String comparison of the first n characters.
10315
     *
10316
     * EXAMPLE: <code>
10317
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10318
     * </code>
10319
     *
10320
     * @see http://php.net/manual/en/function.strncmp.php
10321
     *
10322
     * @param string $str1     <p>The first string.</p>
10323
     * @param string $str2     <p>The second string.</p>
10324
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10325
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10326
     *
10327
     * @psalm-pure
10328
     *
10329
     * @return int
10330
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10331
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10332
     *             <strong>0</strong> if they are equal
10333
     */
10334
    public static function strncmp(
10335
        string $str1,
10336
        string $str2,
10337
        int $len,
10338
        string $encoding = 'UTF-8'
10339
    ): int {
10340 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10341
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10342
        }
10343
10344 4
        if ($encoding === 'UTF-8') {
10345 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10346 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10347
        } else {
10348
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10349
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10350
        }
10351
10352 4
        return self::strcmp($str1, $str2);
10353
    }
10354
10355
    /**
10356
     * Search a string for any of a set of characters.
10357
     *
10358
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10359
     *
10360
     * @see http://php.net/manual/en/function.strpbrk.php
10361
     *
10362
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10363
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10364
     *
10365
     * @psalm-pure
10366
     *
10367
     * @return false|string
10368
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10369
     */
10370
    public static function strpbrk(string $haystack, string $char_list)
10371
    {
10372 2
        if ($haystack === '' || $char_list === '') {
10373 2
            return false;
10374
        }
10375
10376 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10377 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10378
        }
10379
10380 2
        return false;
10381
    }
10382
10383
    /**
10384
     * Find the position of the first occurrence of a substring in a string.
10385
     *
10386
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10387
     *
10388
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10389
     *
10390
     * @see http://php.net/manual/en/function.mb-strpos.php
10391
     *
10392
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10393
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10394
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10395
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10396
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10397
     *
10398
     * @psalm-pure
10399
     *
10400
     * @return false|int
10401
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10402
     *                   string.<br> If needle is not found it returns false.
10403
     */
10404
    public static function strpos(
10405
        string $haystack,
10406
        $needle,
10407
        int $offset = 0,
10408
        string $encoding = 'UTF-8',
10409
        bool $clean_utf8 = false
10410
    ) {
10411 53
        if ($haystack === '') {
10412 4
            return false;
10413
        }
10414
10415
        // iconv and mbstring do not support integer $needle
10416 52
        if ((int) $needle === $needle) {
10417
            $needle = (string) self::chr($needle);
10418
        }
10419 52
        $needle = (string) $needle;
10420
10421 52
        if ($needle === '') {
10422 2
            return false;
10423
        }
10424
10425 52
        if ($clean_utf8) {
10426
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10427
            // if invalid characters are found in $haystack before $needle
10428 3
            $needle = self::clean($needle);
10429 3
            $haystack = self::clean($haystack);
10430
        }
10431
10432 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10433 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10434
        }
10435
10436
        //
10437
        // fallback via mbstring
10438
        //
10439
10440 52
        if (self::$SUPPORT['mbstring'] === true) {
10441 50
            if ($encoding === 'UTF-8') {
10442
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10443 50
                return @\mb_strpos($haystack, $needle, $offset);
10444
            }
10445
10446
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10447 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10448
        }
10449
10450
        //
10451
        // fallback for binary || ascii only
10452
        //
10453
        if (
10454 4
            $encoding === 'CP850'
10455
            ||
10456 4
            $encoding === 'ASCII'
10457
        ) {
10458 2
            return \strpos($haystack, $needle, $offset);
10459
        }
10460
10461
        if (
10462 4
            $encoding !== 'UTF-8'
10463
            &&
10464 4
            self::$SUPPORT['iconv'] === false
10465
            &&
10466 4
            self::$SUPPORT['mbstring'] === false
10467
        ) {
10468
            /**
10469
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10470
             */
10471 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10472
        }
10473
10474
        //
10475
        // fallback via intl
10476
        //
10477
10478
        if (
10479 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10480
            &&
10481 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10482
            &&
10483 4
            self::$SUPPORT['intl'] === true
10484
        ) {
10485
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10486
            if ($return_tmp !== false) {
10487
                return $return_tmp;
10488
            }
10489
        }
10490
10491
        //
10492
        // fallback via iconv
10493
        //
10494
10495
        if (
10496 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10497
            &&
10498 4
            self::$SUPPORT['iconv'] === true
10499
        ) {
10500
            // ignore invalid negative offset to keep compatibility
10501
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10502
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10503
            if ($return_tmp !== false) {
10504
                return $return_tmp;
10505
            }
10506
        }
10507
10508
        //
10509
        // fallback for ascii only
10510
        //
10511
10512 4
        if (ASCII::is_ascii($haystack . $needle)) {
10513
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10514 2
            return @\strpos($haystack, $needle, $offset);
10515
        }
10516
10517
        //
10518
        // fallback via vanilla php
10519
        //
10520
10521 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10522 4
        if ($haystack_tmp === false) {
10523
            $haystack_tmp = '';
10524
        }
10525 4
        $haystack = (string) $haystack_tmp;
10526
10527 4
        if ($offset < 0) {
10528
            $offset = 0;
10529
        }
10530
10531 4
        $pos = \strpos($haystack, $needle);
10532 4
        if ($pos === false) {
10533 2
            return false;
10534
        }
10535
10536 4
        if ($pos) {
10537 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10538
        }
10539
10540 2
        return $offset + 0;
10541
    }
10542
10543
    /**
10544
     * Find the position of the first occurrence of a substring in a string.
10545
     *
10546
     * @param string $haystack <p>
10547
     *                         The string being checked.
10548
     *                         </p>
10549
     * @param string $needle   <p>
10550
     *                         The position counted from the beginning of haystack.
10551
     *                         </p>
10552
     * @param int    $offset   [optional] <p>
10553
     *                         The search offset. If it is not specified, 0 is used.
10554
     *                         </p>
10555
     *
10556
     * @psalm-pure
10557
     *
10558
     * @return false|int
10559
     *                   <p>The numeric position of the first occurrence of needle in the
10560
     *                   haystack string. If needle is not found, it returns false.</p>
10561
     */
10562
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10563
    {
10564 2
        if ($haystack === '' || $needle === '') {
10565
            return false;
10566
        }
10567
10568 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10569
            // "mb_" is available if overload is used, so use it ...
10570
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10571
        }
10572
10573 2
        return \strpos($haystack, $needle, $offset);
10574
    }
10575
10576
    /**
10577
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10578
     *
10579
     * @param string $haystack <p>
10580
     *                         The string being checked.
10581
     *                         </p>
10582
     * @param string $needle   <p>
10583
     *                         The position counted from the beginning of haystack.
10584
     *                         </p>
10585
     * @param int    $offset   [optional] <p>
10586
     *                         The search offset. If it is not specified, 0 is used.
10587
     *                         </p>
10588
     *
10589
     * @psalm-pure
10590
     *
10591
     * @return false|int
10592
     *                   <p>The numeric position of the first occurrence of needle in the
10593
     *                   haystack string. If needle is not found, it returns false.</p>
10594
     */
10595
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10596
    {
10597 2
        if ($haystack === '' || $needle === '') {
10598
            return false;
10599
        }
10600
10601 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10602
            // "mb_" is available if overload is used, so use it ...
10603
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10604
        }
10605
10606 2
        return \stripos($haystack, $needle, $offset);
10607
    }
10608
10609
    /**
10610
     * Find the last occurrence of a character in a string within another.
10611
     *
10612
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10613
     *
10614
     * @see http://php.net/manual/en/function.mb-strrchr.php
10615
     *
10616
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10617
     * @param string $needle        <p>The string to find in haystack</p>
10618
     * @param bool   $before_needle [optional] <p>
10619
     *                              Determines which portion of haystack
10620
     *                              this function returns.
10621
     *                              If set to true, it returns all of haystack
10622
     *                              from the beginning to the last occurrence of needle.
10623
     *                              If set to false, it returns all of haystack
10624
     *                              from the last occurrence of needle to the end,
10625
     *                              </p>
10626
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10627
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10628
     *
10629
     * @psalm-pure
10630
     *
10631
     * @return false|string
10632
     *                      <p>The portion of haystack or false if needle is not found.</p>
10633
     */
10634
    public static function strrchr(
10635
        string $haystack,
10636
        string $needle,
10637
        bool $before_needle = false,
10638
        string $encoding = 'UTF-8',
10639
        bool $clean_utf8 = false
10640
    ) {
10641 2
        if ($haystack === '' || $needle === '') {
10642 2
            return false;
10643
        }
10644
10645 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10646 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10647
        }
10648
10649 2
        if ($clean_utf8) {
10650
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10651
            // if invalid characters are found in $haystack before $needle
10652 2
            $needle = self::clean($needle);
10653 2
            $haystack = self::clean($haystack);
10654
        }
10655
10656
        //
10657
        // fallback via mbstring
10658
        //
10659
10660 2
        if (self::$SUPPORT['mbstring'] === true) {
10661 2
            if ($encoding === 'UTF-8') {
10662 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10663
            }
10664
10665 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10666
        }
10667
10668
        //
10669
        // fallback for binary || ascii only
10670
        //
10671
10672
        if (
10673
            !$before_needle
10674
            &&
10675
            (
10676
                $encoding === 'CP850'
10677
                ||
10678
                $encoding === 'ASCII'
10679
            )
10680
        ) {
10681
            return \strrchr($haystack, $needle);
10682
        }
10683
10684
        if (
10685
            $encoding !== 'UTF-8'
10686
            &&
10687
            self::$SUPPORT['mbstring'] === false
10688
        ) {
10689
            /**
10690
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10691
             */
10692
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10693
        }
10694
10695
        //
10696
        // fallback via iconv
10697
        //
10698
10699
        if (self::$SUPPORT['iconv'] === true) {
10700
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10701
            if ($needle_tmp === false) {
10702
                return false;
10703
            }
10704
            $needle = (string) $needle_tmp;
10705
10706
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10707
            if ($pos === false) {
10708
                return false;
10709
            }
10710
10711
            if ($before_needle) {
10712
                return self::substr($haystack, 0, $pos, $encoding);
10713
            }
10714
10715
            return self::substr($haystack, $pos, null, $encoding);
10716
        }
10717
10718
        //
10719
        // fallback via vanilla php
10720
        //
10721
10722
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10723
        if ($needle_tmp === false) {
10724
            return false;
10725
        }
10726
        $needle = (string) $needle_tmp;
10727
10728
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10729
        if ($pos === false) {
10730
            return false;
10731
        }
10732
10733
        if ($before_needle) {
10734
            return self::substr($haystack, 0, $pos, $encoding);
10735
        }
10736
10737
        return self::substr($haystack, $pos, null, $encoding);
10738
    }
10739
10740
    /**
10741
     * Reverses characters order in the string.
10742
     *
10743
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10744
     *
10745
     * @param string $str      <p>The input string.</p>
10746
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10747
     *
10748
     * @psalm-pure
10749
     *
10750
     * @return string
10751
     *                <p>The string with characters in the reverse sequence.</p>
10752
     */
10753
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10754
    {
10755 10
        if ($str === '') {
10756 4
            return '';
10757
        }
10758
10759
        // init
10760 8
        $reversed = '';
10761
10762 8
        $str = self::emoji_encode($str, true);
10763
10764 8
        if ($encoding === 'UTF-8') {
10765 8
            if (self::$SUPPORT['intl'] === true) {
10766
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10767 8
                $i = (int) \grapheme_strlen($str);
10768 8
                while ($i--) {
10769 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10770 8
                    if ($reversed_tmp !== false) {
10771 8
                        $reversed .= $reversed_tmp;
10772
                    }
10773
                }
10774
            } else {
10775
                $i = (int) \mb_strlen($str);
10776 8
                while ($i--) {
10777
                    $reversed_tmp = \mb_substr($str, $i, 1);
10778
                    if ($reversed_tmp !== false) {
10779
                        $reversed .= $reversed_tmp;
10780
                    }
10781
                }
10782
            }
10783
        } else {
10784
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10785
10786
            $i = (int) self::strlen($str, $encoding);
10787
            while ($i--) {
10788
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10789
                if ($reversed_tmp !== false) {
10790
                    $reversed .= $reversed_tmp;
10791
                }
10792
            }
10793
        }
10794
10795 8
        return self::emoji_decode($reversed, true);
10796
    }
10797
10798
    /**
10799
     * Find the last occurrence of a character in a string within another, case-insensitive.
10800
     *
10801
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10802
     *
10803
     * @see http://php.net/manual/en/function.mb-strrichr.php
10804
     *
10805
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10806
     * @param string $needle        <p>The string to find in haystack.</p>
10807
     * @param bool   $before_needle [optional] <p>
10808
     *                              Determines which portion of haystack
10809
     *                              this function returns.
10810
     *                              If set to true, it returns all of haystack
10811
     *                              from the beginning to the last occurrence of needle.
10812
     *                              If set to false, it returns all of haystack
10813
     *                              from the last occurrence of needle to the end,
10814
     *                              </p>
10815
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10816
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10817
     *
10818
     * @psalm-pure
10819
     *
10820
     * @return false|string
10821
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10822
     */
10823
    public static function strrichr(
10824
        string $haystack,
10825
        string $needle,
10826
        bool $before_needle = false,
10827
        string $encoding = 'UTF-8',
10828
        bool $clean_utf8 = false
10829
    ) {
10830 3
        if ($haystack === '' || $needle === '') {
10831 2
            return false;
10832
        }
10833
10834 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10835 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10836
        }
10837
10838 3
        if ($clean_utf8) {
10839
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10840
            // if invalid characters are found in $haystack before $needle
10841 2
            $needle = self::clean($needle);
10842 2
            $haystack = self::clean($haystack);
10843
        }
10844
10845
        //
10846
        // fallback via mbstring
10847
        //
10848
10849 3
        if (self::$SUPPORT['mbstring'] === true) {
10850 3
            if ($encoding === 'UTF-8') {
10851 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10852
            }
10853
10854 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10855
        }
10856
10857
        //
10858
        // fallback via vanilla php
10859
        //
10860
10861
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10862
        if ($needle_tmp === false) {
10863
            return false;
10864
        }
10865
        $needle = (string) $needle_tmp;
10866
10867
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10868
        if ($pos === false) {
10869
            return false;
10870
        }
10871
10872
        if ($before_needle) {
10873
            return self::substr($haystack, 0, $pos, $encoding);
10874
        }
10875
10876
        return self::substr($haystack, $pos, null, $encoding);
10877
    }
10878
10879
    /**
10880
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10881
     *
10882
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10883
     *
10884
     * @param string     $haystack   <p>The string to look in.</p>
10885
     * @param int|string $needle     <p>The string to look for.</p>
10886
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10887
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10888
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10889
     *
10890
     * @psalm-pure
10891
     *
10892
     * @return false|int
10893
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10894
     *                   string.<br>If needle is not found, it returns false.</p>
10895
     */
10896
    public static function strripos(
10897
        string $haystack,
10898
        $needle,
10899
        int $offset = 0,
10900
        string $encoding = 'UTF-8',
10901
        bool $clean_utf8 = false
10902
    ) {
10903 14
        if ($haystack === '') {
10904
            return false;
10905
        }
10906
10907
        // iconv and mbstring do not support integer $needle
10908 14
        if ((int) $needle === $needle && $needle >= 0) {
10909
            $needle = (string) self::chr($needle);
10910
        }
10911 14
        $needle = (string) $needle;
10912
10913 14
        if ($needle === '') {
10914
            return false;
10915
        }
10916
10917 14
        if ($clean_utf8) {
10918
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10919 3
            $needle = self::clean($needle);
10920 3
            $haystack = self::clean($haystack);
10921
        }
10922
10923 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10924 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10925
        }
10926
10927
        //
10928
        // fallback via mbstrig
10929
        //
10930
10931 14
        if (self::$SUPPORT['mbstring'] === true) {
10932 14
            if ($encoding === 'UTF-8') {
10933 14
                return \mb_strripos($haystack, $needle, $offset);
10934
            }
10935
10936
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10937
        }
10938
10939
        //
10940
        // fallback for binary || ascii only
10941
        //
10942
10943
        if (
10944
            $encoding === 'CP850'
10945
            ||
10946
            $encoding === 'ASCII'
10947
        ) {
10948
            return \strripos($haystack, $needle, $offset);
10949
        }
10950
10951
        if (
10952
            $encoding !== 'UTF-8'
10953
            &&
10954
            self::$SUPPORT['mbstring'] === false
10955
        ) {
10956
            /**
10957
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10958
             */
10959
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10960
        }
10961
10962
        //
10963
        // fallback via intl
10964
        //
10965
10966
        if (
10967
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
10968
            &&
10969
            $offset >= 0 // grapheme_strripos() can't handle negative offset
10970
            &&
10971
            self::$SUPPORT['intl'] === true
10972
        ) {
10973
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
10974
            if ($return_tmp !== false) {
10975
                return $return_tmp;
10976
            }
10977
        }
10978
10979
        //
10980
        // fallback for ascii only
10981
        //
10982
10983
        if (ASCII::is_ascii($haystack . $needle)) {
10984
            return \strripos($haystack, $needle, $offset);
10985
        }
10986
10987
        //
10988
        // fallback via vanilla php
10989
        //
10990
10991
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
10992
        $needle = self::strtocasefold($needle, true, false, $encoding);
10993
10994
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
10995
    }
10996
10997
    /**
10998
     * Finds position of last occurrence of a string within another, case-insensitive.
10999
     *
11000
     * @param string $haystack <p>
11001
     *                         The string from which to get the position of the last occurrence
11002
     *                         of needle.
11003
     *                         </p>
11004
     * @param string $needle   <p>
11005
     *                         The string to find in haystack.
11006
     *                         </p>
11007
     * @param int    $offset   [optional] <p>
11008
     *                         The position in haystack
11009
     *                         to start searching.
11010
     *                         </p>
11011
     *
11012
     * @psalm-pure
11013
     *
11014
     * @return false|int
11015
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11016
     *                   haystack string, or false if needle is not found.</p>
11017
     */
11018
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11019
    {
11020 2
        if ($haystack === '' || $needle === '') {
11021
            return false;
11022
        }
11023
11024 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11025
            // "mb_" is available if overload is used, so use it ...
11026
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11027
        }
11028
11029 2
        return \strripos($haystack, $needle, $offset);
11030
    }
11031
11032
    /**
11033
     * Find the position of the last occurrence of a substring in a string.
11034
     *
11035
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11036
     *
11037
     * @see http://php.net/manual/en/function.mb-strrpos.php
11038
     *
11039
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11040
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11041
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11042
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11043
     *                               the end of the string.
11044
     *                               </p>
11045
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11046
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11047
     *
11048
     * @psalm-pure
11049
     *
11050
     * @return false|int
11051
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11052
     *                   string.<br>If needle is not found, it returns false.</p>
11053
     */
11054
    public static function strrpos(
11055
        string $haystack,
11056
        $needle,
11057
        int $offset = 0,
11058
        string $encoding = 'UTF-8',
11059
        bool $clean_utf8 = false
11060
    ) {
11061 35
        if ($haystack === '') {
11062 3
            return false;
11063
        }
11064
11065
        // iconv and mbstring do not support integer $needle
11066 34
        if ((int) $needle === $needle && $needle >= 0) {
11067 1
            $needle = (string) self::chr($needle);
11068
        }
11069 34
        $needle = (string) $needle;
11070
11071 34
        if ($needle === '') {
11072 2
            return false;
11073
        }
11074
11075 34
        if ($clean_utf8) {
11076
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11077 4
            $needle = self::clean($needle);
11078 4
            $haystack = self::clean($haystack);
11079
        }
11080
11081 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11082 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11083
        }
11084
11085
        //
11086
        // fallback via mbstring
11087
        //
11088
11089 34
        if (self::$SUPPORT['mbstring'] === true) {
11090 34
            if ($encoding === 'UTF-8') {
11091 34
                return \mb_strrpos($haystack, $needle, $offset);
11092
            }
11093
11094 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11095
        }
11096
11097
        //
11098
        // fallback for binary || ascii only
11099
        //
11100
11101
        if (
11102
            $encoding === 'CP850'
11103
            ||
11104
            $encoding === 'ASCII'
11105
        ) {
11106
            return \strrpos($haystack, $needle, $offset);
11107
        }
11108
11109
        if (
11110
            $encoding !== 'UTF-8'
11111
            &&
11112
            self::$SUPPORT['mbstring'] === false
11113
        ) {
11114
            /**
11115
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11116
             */
11117
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11118
        }
11119
11120
        //
11121
        // fallback via intl
11122
        //
11123
11124
        if (
11125
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11126
            &&
11127
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11128
            &&
11129
            self::$SUPPORT['intl'] === true
11130
        ) {
11131
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11132
            if ($return_tmp !== false) {
11133
                return $return_tmp;
11134
            }
11135
        }
11136
11137
        //
11138
        // fallback for ascii only
11139
        //
11140
11141
        if (ASCII::is_ascii($haystack . $needle)) {
11142
            return \strrpos($haystack, $needle, $offset);
11143
        }
11144
11145
        //
11146
        // fallback via vanilla php
11147
        //
11148
11149
        $haystack_tmp = null;
11150
        if ($offset > 0) {
11151
            $haystack_tmp = self::substr($haystack, $offset);
11152
        } elseif ($offset < 0) {
11153
            $haystack_tmp = self::substr($haystack, 0, $offset);
11154
            $offset = 0;
11155
        }
11156
11157
        if ($haystack_tmp !== null) {
11158
            if ($haystack_tmp === false) {
11159
                $haystack_tmp = '';
11160
            }
11161
            $haystack = (string) $haystack_tmp;
11162
        }
11163
11164
        $pos = \strrpos($haystack, $needle);
11165
        if ($pos === false) {
11166
            return false;
11167
        }
11168
11169
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11170
        $str_tmp = \substr($haystack, 0, $pos);
11171
        if ($str_tmp === false) {
11172
            return false;
11173
        }
11174
11175
        return $offset + (int) self::strlen($str_tmp);
11176
    }
11177
11178
    /**
11179
     * Find the position of the last occurrence of a substring in a string.
11180
     *
11181
     * @param string $haystack <p>
11182
     *                         The string being checked, for the last occurrence
11183
     *                         of needle.
11184
     *                         </p>
11185
     * @param string $needle   <p>
11186
     *                         The string to find in haystack.
11187
     *                         </p>
11188
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11189
     *                         the string. Negative values will stop searching at an arbitrary point
11190
     *                         prior to the end of the string.
11191
     *                         </p>
11192
     *
11193
     * @psalm-pure
11194
     *
11195
     * @return false|int
11196
     *                   <p>The numeric position of the last occurrence of needle in the
11197
     *                   haystack string. If needle is not found, it returns false.</p>
11198
     */
11199
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11200
    {
11201 2
        if ($haystack === '' || $needle === '') {
11202
            return false;
11203
        }
11204
11205 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11206
            // "mb_" is available if overload is used, so use it ...
11207
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11208
        }
11209
11210 2
        return \strrpos($haystack, $needle, $offset);
11211
    }
11212
11213
    /**
11214
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11215
     * mask.
11216
     *
11217
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11218
     *
11219
     * @param string $str      <p>The input string.</p>
11220
     * @param string $mask     <p>The mask of chars</p>
11221
     * @param int    $offset   [optional]
11222
     * @param int    $length   [optional]
11223
     * @param string $encoding [optional] <p>Set the charset.</p>
11224
     *
11225
     * @psalm-pure
11226
     *
11227
     * @return false|int
11228
     */
11229
    public static function strspn(
11230
        string $str,
11231
        string $mask,
11232
        int $offset = 0,
11233
        int $length = null,
11234
        string $encoding = 'UTF-8'
11235
    ) {
11236 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11237
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11238
        }
11239
11240 10
        if ($offset || $length !== null) {
11241 2
            if ($encoding === 'UTF-8') {
11242 2
                if ($length === null) {
11243
                    $str = (string) \mb_substr($str, $offset);
11244
                } else {
11245 2
                    $str = (string) \mb_substr($str, $offset, $length);
11246
                }
11247
            } else {
11248
                $str = (string) self::substr($str, $offset, $length, $encoding);
11249
            }
11250
        }
11251
11252 10
        if ($str === '' || $mask === '') {
11253 2
            return 0;
11254
        }
11255
11256 8
        $matches = [];
11257
11258 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11259
    }
11260
11261
    /**
11262
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11263
     *
11264
     * EXAMPLE: <code>
11265
     * $str = 'iñtërnâtiônàlizætiøn';
11266
     * $search = 'nât';
11267
     *
11268
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11269
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11270
     * </code>
11271
     *
11272
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11273
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11274
     * @param bool   $before_needle [optional] <p>
11275
     *                              If <b>TRUE</b>, strstr() returns the part of the
11276
     *                              haystack before the first occurrence of the needle (excluding the needle).
11277
     *                              </p>
11278
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11279
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11280
     *
11281
     * @psalm-pure
11282
     *
11283
     * @return false|string
11284
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
11285
     */
11286
    public static function strstr(
11287
        string $haystack,
11288
        string $needle,
11289
        bool $before_needle = false,
11290
        string $encoding = 'UTF-8',
11291
        bool $clean_utf8 = false
11292
    ) {
11293 3
        if ($haystack === '' || $needle === '') {
11294 2
            return false;
11295
        }
11296
11297 3
        if ($clean_utf8) {
11298
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11299
            // if invalid characters are found in $haystack before $needle
11300
            $needle = self::clean($needle);
11301
            $haystack = self::clean($haystack);
11302
        }
11303
11304 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11305 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11306
        }
11307
11308
        //
11309
        // fallback via mbstring
11310
        //
11311
11312 3
        if (self::$SUPPORT['mbstring'] === true) {
11313 3
            if ($encoding === 'UTF-8') {
11314 3
                return \mb_strstr($haystack, $needle, $before_needle);
11315
            }
11316
11317 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11318
        }
11319
11320
        //
11321
        // fallback for binary || ascii only
11322
        //
11323
11324
        if (
11325
            $encoding === 'CP850'
11326
            ||
11327
            $encoding === 'ASCII'
11328
        ) {
11329
            return \strstr($haystack, $needle, $before_needle);
11330
        }
11331
11332
        if (
11333
            $encoding !== 'UTF-8'
11334
            &&
11335
            self::$SUPPORT['mbstring'] === false
11336
        ) {
11337
            /**
11338
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11339
             */
11340
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11341
        }
11342
11343
        //
11344
        // fallback via intl
11345
        //
11346
11347
        if (
11348
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11349
            &&
11350
            self::$SUPPORT['intl'] === true
11351
        ) {
11352
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11353
            if ($return_tmp !== false) {
11354
                return $return_tmp;
11355
            }
11356
        }
11357
11358
        //
11359
        // fallback for ascii only
11360
        //
11361
11362
        if (ASCII::is_ascii($haystack . $needle)) {
11363
            return \strstr($haystack, $needle, $before_needle);
11364
        }
11365
11366
        //
11367
        // fallback via vanilla php
11368
        //
11369
11370
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11371
11372
        if (!isset($match[1])) {
11373
            return false;
11374
        }
11375
11376
        if ($before_needle) {
11377
            return $match[1];
11378
        }
11379
11380
        return self::substr($haystack, (int) self::strlen($match[1]));
11381
    }
11382
11383
    /**
11384
     * Finds first occurrence of a string within another.
11385
     *
11386
     * @param string $haystack      <p>
11387
     *                              The string from which to get the first occurrence
11388
     *                              of needle.
11389
     *                              </p>
11390
     * @param string $needle        <p>
11391
     *                              The string to find in haystack.
11392
     *                              </p>
11393
     * @param bool   $before_needle [optional] <p>
11394
     *                              Determines which portion of haystack
11395
     *                              this function returns.
11396
     *                              If set to true, it returns all of haystack
11397
     *                              from the beginning to the first occurrence of needle.
11398
     *                              If set to false, it returns all of haystack
11399
     *                              from the first occurrence of needle to the end,
11400
     *                              </p>
11401
     *
11402
     * @psalm-pure
11403
     *
11404
     * @return false|string
11405
     *                      <p>The portion of haystack,
11406
     *                      or false if needle is not found.</p>
11407
     */
11408
    public static function strstr_in_byte(
11409
        string $haystack,
11410
        string $needle,
11411
        bool $before_needle = false
11412
    ) {
11413 2
        if ($haystack === '' || $needle === '') {
11414
            return false;
11415
        }
11416
11417 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11418
            // "mb_" is available if overload is used, so use it ...
11419
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11420
        }
11421
11422 2
        return \strstr($haystack, $needle, $before_needle);
11423
    }
11424
11425
    /**
11426
     * Unicode transformation for case-less matching.
11427
     *
11428
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11429
     *
11430
     * @see http://unicode.org/reports/tr21/tr21-5.html
11431
     *
11432
     * @param string      $str        <p>The input string.</p>
11433
     * @param bool        $full       [optional] <p>
11434
     *                                <b>true</b>, replace full case folding chars (default)<br>
11435
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11436
     *                                </p>
11437
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11438
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11439
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11440
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11441
     *                                is for some languages better ...</p>
11442
     *
11443
     * @psalm-pure
11444
     *
11445
     * @return string
11446
     */
11447
    public static function strtocasefold(
11448
        string $str,
11449
        bool $full = true,
11450
        bool $clean_utf8 = false,
11451
        string $encoding = 'UTF-8',
11452
        string $lang = null,
11453
        bool $lower = true
11454
    ): string {
11455 32
        if ($str === '') {
11456 5
            return '';
11457
        }
11458
11459 31
        if ($clean_utf8) {
11460
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11461
            // if invalid characters are found in $haystack before $needle
11462 2
            $str = self::clean($str);
11463
        }
11464
11465 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11466
11467 31
        if ($lang === null && $encoding === 'UTF-8') {
11468 31
            if ($lower) {
11469 2
                return \mb_strtolower($str);
11470
            }
11471
11472 29
            return \mb_strtoupper($str);
11473
        }
11474
11475 2
        if ($lower) {
11476
            return self::strtolower($str, $encoding, false, $lang);
11477
        }
11478
11479 2
        return self::strtoupper($str, $encoding, false, $lang);
11480
    }
11481
11482
    /**
11483
     * Make a string lowercase.
11484
     *
11485
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11486
     *
11487
     * @see http://php.net/manual/en/function.mb-strtolower.php
11488
     *
11489
     * @param string      $str                           <p>The string being lowercased.</p>
11490
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11491
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11492
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11493
     *                                                   tr</p>
11494
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11495
     *                                                   -> ß</p>
11496
     *
11497
     * @psalm-pure
11498
     *
11499
     * @return string
11500
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11501
     */
11502
    public static function strtolower(
11503
        $str,
11504
        string $encoding = 'UTF-8',
11505
        bool $clean_utf8 = false,
11506
        string $lang = null,
11507
        bool $try_to_keep_the_string_length = false
11508
    ): string {
11509
        // init
11510 73
        $str = (string) $str;
11511
11512 73
        if ($str === '') {
11513 1
            return '';
11514
        }
11515
11516 72
        if ($clean_utf8) {
11517
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11518
            // if invalid characters are found in $haystack before $needle
11519 2
            $str = self::clean($str);
11520
        }
11521
11522
        // hack for old php version or for the polyfill ...
11523 72
        if ($try_to_keep_the_string_length) {
11524
            $str = self::fixStrCaseHelper($str, true);
11525
        }
11526
11527 72
        if ($lang === null && $encoding === 'UTF-8') {
11528 13
            return \mb_strtolower($str);
11529
        }
11530
11531 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11532
11533 61
        if ($lang !== null) {
11534 2
            if (self::$SUPPORT['intl'] === true) {
11535 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11536
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11537
                }
11538
11539 2
                $language_code = $lang . '-Lower';
11540 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11541
                    /**
11542
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11543
                     */
11544
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11545
11546
                    $language_code = 'Any-Lower';
11547
                }
11548
11549
                /** @noinspection PhpComposerExtensionStubsInspection */
11550
                /** @noinspection UnnecessaryCastingInspection */
11551 2
                return (string) \transliterator_transliterate($language_code, $str);
11552
            }
11553
11554
            /**
11555
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11556
             */
11557
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11558
        }
11559
11560
        // always fallback via symfony polyfill
11561 61
        return \mb_strtolower($str, $encoding);
11562
    }
11563
11564
    /**
11565
     * Make a string uppercase.
11566
     *
11567
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11568
     *
11569
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11570
     *
11571
     * @param string      $str                           <p>The string being uppercased.</p>
11572
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11573
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11574
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11575
     *                                                   tr</p>
11576
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11577
     *                                                   -> ß</p>
11578
     *
11579
     * @psalm-pure
11580
     *
11581
     * @return string
11582
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11583
     */
11584
    public static function strtoupper(
11585
        $str,
11586
        string $encoding = 'UTF-8',
11587
        bool $clean_utf8 = false,
11588
        string $lang = null,
11589
        bool $try_to_keep_the_string_length = false
11590
    ): string {
11591
        // init
11592 17
        $str = (string) $str;
11593
11594 17
        if ($str === '') {
11595 1
            return '';
11596
        }
11597
11598 16
        if ($clean_utf8) {
11599
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11600
            // if invalid characters are found in $haystack before $needle
11601 2
            $str = self::clean($str);
11602
        }
11603
11604
        // hack for old php version or for the polyfill ...
11605 16
        if ($try_to_keep_the_string_length) {
11606 2
            $str = self::fixStrCaseHelper($str);
11607
        }
11608
11609 16
        if ($lang === null && $encoding === 'UTF-8') {
11610 8
            return \mb_strtoupper($str);
11611
        }
11612
11613 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11614
11615 10
        if ($lang !== null) {
11616 2
            if (self::$SUPPORT['intl'] === true) {
11617 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11618
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11619
                }
11620
11621 2
                $language_code = $lang . '-Upper';
11622 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11623
                    /**
11624
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11625
                     */
11626
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11627
11628
                    $language_code = 'Any-Upper';
11629
                }
11630
11631
                /** @noinspection PhpComposerExtensionStubsInspection */
11632
                /** @noinspection UnnecessaryCastingInspection */
11633 2
                return (string) \transliterator_transliterate($language_code, $str);
11634
            }
11635
11636
            /**
11637
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11638
             */
11639
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11640
        }
11641
11642
        // always fallback via symfony polyfill
11643 10
        return \mb_strtoupper($str, $encoding);
11644
    }
11645
11646
    /**
11647
     * Translate characters or replace sub-strings.
11648
     *
11649
     * EXAMPLE:
11650
     * <code>
11651
     * $array = [
11652
     *     'Hello'   => '○●◎',
11653
     *     '中文空白' => 'earth',
11654
     * ];
11655
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11656
     * </code>
11657
     *
11658
     * @see http://php.net/manual/en/function.strtr.php
11659
     *
11660
     * @param string          $str  <p>The string being translated.</p>
11661
     * @param string|string[] $from <p>The string replacing from.</p>
11662
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11663
     *
11664
     * @psalm-pure
11665
     *
11666
     * @return string
11667
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11668
     *                to the corresponding character in "to".</p>
11669
     */
11670
    public static function strtr(string $str, $from, $to = ''): string
11671
    {
11672 2
        if ($str === '') {
11673
            return '';
11674
        }
11675
11676 2
        if ($from === $to) {
11677
            return $str;
11678
        }
11679
11680 2
        if ($to !== '') {
11681 2
            if (!\is_array($from)) {
11682 2
                $from = self::str_split($from);
11683
            }
11684
11685 2
            if (!\is_array($to)) {
11686 2
                $to = self::str_split($to);
11687
            }
11688
11689 2
            $count_from = \count($from);
11690 2
            $count_to = \count($to);
11691
11692 2
            if ($count_from !== $count_to) {
11693 2
                if ($count_from > $count_to) {
11694 2
                    $from = \array_slice($from, 0, $count_to);
11695 2
                } elseif ($count_from < $count_to) {
11696 2
                    $to = \array_slice($to, 0, $count_from);
11697
                }
11698
            }
11699
11700 2
            $from = \array_combine($from, $to);
11701
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11702 2
            if ($from === false) {
11703
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11704
            }
11705
        }
11706
11707 2
        if (\is_string($from)) {
11708 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11708
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11709
        }
11710
11711 2
        return \strtr($str, $from);
11712
    }
11713
11714
    /**
11715
     * Return the width of a string.
11716
     *
11717
     * INFO: use UTF8::strlen() for the byte-length
11718
     *
11719
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11720
     *
11721
     * @param string $str        <p>The input string.</p>
11722
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11723
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11724
     *
11725
     * @psalm-pure
11726
     *
11727
     * @return int
11728
     */
11729
    public static function strwidth(
11730
        string $str,
11731
        string $encoding = 'UTF-8',
11732
        bool $clean_utf8 = false
11733
    ): int {
11734 2
        if ($str === '') {
11735 2
            return 0;
11736
        }
11737
11738 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11739 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11740
        }
11741
11742 2
        if ($clean_utf8) {
11743
            // iconv and mbstring are not tolerant to invalid encoding
11744
            // further, their behaviour is inconsistent with that of PHP's substr
11745 2
            $str = self::clean($str);
11746
        }
11747
11748
        //
11749
        // fallback via mbstring
11750
        //
11751
11752 2
        if (self::$SUPPORT['mbstring'] === true) {
11753 2
            if ($encoding === 'UTF-8') {
11754 2
                return \mb_strwidth($str);
11755
            }
11756
11757
            return \mb_strwidth($str, $encoding);
11758
        }
11759
11760
        //
11761
        // fallback via vanilla php
11762
        //
11763
11764
        if ($encoding !== 'UTF-8') {
11765
            $str = self::encode('UTF-8', $str, false, $encoding);
11766
        }
11767
11768
        $wide = 0;
11769
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11770
11771
        return ($wide << 1) + (int) self::strlen($str);
11772
    }
11773
11774
    /**
11775
     * Get part of a string.
11776
     *
11777
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11778
     *
11779
     * @see http://php.net/manual/en/function.mb-substr.php
11780
     *
11781
     * @param string $str        <p>The string being checked.</p>
11782
     * @param int    $offset     <p>The first position used in str.</p>
11783
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
11784
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11785
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11786
     *
11787
     * @psalm-pure
11788
     *
11789
     * @return false|string
11790
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11791
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11792
     *                      characters long, <b>FALSE</b> will be returned.
11793
     */
11794
    public static function substr(
11795
        string $str,
11796
        int $offset = 0,
11797
        int $length = null,
11798
        string $encoding = 'UTF-8',
11799
        bool $clean_utf8 = false
11800
    ) {
11801
        // empty string
11802 172
        if ($str === '' || $length === 0) {
11803 8
            return '';
11804
        }
11805
11806 168
        if ($clean_utf8) {
11807
            // iconv and mbstring are not tolerant to invalid encoding
11808
            // further, their behaviour is inconsistent with that of PHP's substr
11809 2
            $str = self::clean($str);
11810
        }
11811
11812
        // whole string
11813 168
        if (!$offset && $length === null) {
11814 7
            return $str;
11815
        }
11816
11817 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11818 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11819
        }
11820
11821
        //
11822
        // fallback via mbstring
11823
        //
11824
11825 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11826 161
            if ($length === null) {
11827 64
                return \mb_substr($str, $offset);
11828
            }
11829
11830 102
            return \mb_substr($str, $offset, $length);
11831
        }
11832
11833
        //
11834
        // fallback for binary || ascii only
11835
        //
11836
11837
        if (
11838 4
            $encoding === 'CP850'
11839
            ||
11840 4
            $encoding === 'ASCII'
11841
        ) {
11842
            if ($length === null) {
11843
                return \substr($str, $offset);
11844
            }
11845
11846
            return \substr($str, $offset, $length);
11847
        }
11848
11849
        // otherwise we need the string-length
11850 4
        $str_length = 0;
11851 4
        if ($offset || $length === null) {
11852 4
            $str_length = self::strlen($str, $encoding);
11853
        }
11854
11855
        // e.g.: invalid chars + mbstring not installed
11856 4
        if ($str_length === false) {
11857
            return false;
11858
        }
11859
11860
        // empty string
11861 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11862
            return '';
11863
        }
11864
11865
        // impossible
11866 4
        if ($offset && $offset > $str_length) {
11867
            return '';
11868
        }
11869
11870 4
        if ($length === null) {
11871 4
            $length = (int) $str_length;
11872
        } else {
11873 2
            $length = (int) $length;
11874
        }
11875
11876
        if (
11877 4
            $encoding !== 'UTF-8'
11878
            &&
11879 4
            self::$SUPPORT['mbstring'] === false
11880
        ) {
11881
            /**
11882
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11883
             */
11884 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11885
        }
11886
11887
        //
11888
        // fallback via intl
11889
        //
11890
11891
        if (
11892 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11893
            &&
11894 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11895
            &&
11896 4
            self::$SUPPORT['intl'] === true
11897
        ) {
11898
            $return_tmp = \grapheme_substr($str, $offset, $length);
11899
            if ($return_tmp !== false) {
11900
                return $return_tmp;
11901
            }
11902
        }
11903
11904
        //
11905
        // fallback via iconv
11906
        //
11907
11908
        if (
11909 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11910
            &&
11911 4
            self::$SUPPORT['iconv'] === true
11912
        ) {
11913
            $return_tmp = \iconv_substr($str, $offset, $length);
11914
            if ($return_tmp !== false) {
11915
                return $return_tmp;
11916
            }
11917
        }
11918
11919
        //
11920
        // fallback for ascii only
11921
        //
11922
11923 4
        if (ASCII::is_ascii($str)) {
11924
            return \substr($str, $offset, $length);
11925
        }
11926
11927
        //
11928
        // fallback via vanilla php
11929
        //
11930
11931
        // split to array, and remove invalid characters
11932 4
        $array = self::str_split($str);
11933
11934
        // extract relevant part, and join to make sting again
11935 4
        return \implode('', \array_slice($array, $offset, $length));
11936
    }
11937
11938
    /**
11939
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11940
     *
11941
     * EXAMPLE: <code>
11942
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11943
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11944
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11945
     * </code>
11946
     *
11947
     * @param string   $str1               <p>The main string being compared.</p>
11948
     * @param string   $str2               <p>The secondary string being compared.</p>
11949
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11950
     *                                     counting from the end of the string.</p>
11951
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11952
     *                                     of the length of the str compared to the length of main_str less the
11953
     *                                     offset.</p>
11954
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11955
     *                                     insensitive.</p>
11956
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11957
     *
11958
     * @psalm-pure
11959
     *
11960
     * @return int
11961
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
11962
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
11963
     *             <strong>0</strong> if they are equal
11964
     */
11965
    public static function substr_compare(
11966
        string $str1,
11967
        string $str2,
11968
        int $offset = 0,
11969
        int $length = null,
11970
        bool $case_insensitivity = false,
11971
        string $encoding = 'UTF-8'
11972
    ): int {
11973
        if (
11974 2
            $offset !== 0
11975
            ||
11976 2
            $length !== null
11977
        ) {
11978 2
            if ($encoding === 'UTF-8') {
11979 2
                if ($length === null) {
11980 2
                    $str1 = (string) \mb_substr($str1, $offset);
11981
                } else {
11982 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
11983
                }
11984 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
11985
            } else {
11986
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
11987
11988
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
11989
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
11990
            }
11991
        }
11992
11993 2
        if ($case_insensitivity) {
11994 2
            return self::strcasecmp($str1, $str2, $encoding);
11995
        }
11996
11997 2
        return self::strcmp($str1, $str2);
11998
    }
11999
12000
    /**
12001
     * Count the number of substring occurrences.
12002
     *
12003
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12004
     *
12005
     * @see http://php.net/manual/en/function.substr-count.php
12006
     *
12007
     * @param string $haystack   <p>The string to search in.</p>
12008
     * @param string $needle     <p>The substring to search for.</p>
12009
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
12010
     * @param int    $length     [optional] <p>
12011
     *                           The maximum length after the specified offset to search for the
12012
     *                           substring. It outputs a warning if the offset plus the length is
12013
     *                           greater than the haystack length.
12014
     *                           </p>
12015
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12016
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12017
     *
12018
     * @psalm-pure
12019
     *
12020
     * @return false|int
12021
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12022
     */
12023
    public static function substr_count(
12024
        string $haystack,
12025
        string $needle,
12026
        int $offset = 0,
12027
        int $length = null,
12028
        string $encoding = 'UTF-8',
12029
        bool $clean_utf8 = false
12030
    ) {
12031 5
        if ($haystack === '' || $needle === '') {
12032 2
            return false;
12033
        }
12034
12035 5
        if ($length === 0) {
12036 2
            return 0;
12037
        }
12038
12039 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12040 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12041
        }
12042
12043 5
        if ($clean_utf8) {
12044
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12045
            // if invalid characters are found in $haystack before $needle
12046
            $needle = self::clean($needle);
12047
            $haystack = self::clean($haystack);
12048
        }
12049
12050 5
        if ($offset || $length > 0) {
12051 2
            if ($length === null) {
12052 2
                $length_tmp = self::strlen($haystack, $encoding);
12053 2
                if ($length_tmp === false) {
12054
                    return false;
12055
                }
12056 2
                $length = (int) $length_tmp;
12057
            }
12058
12059 2
            if ($encoding === 'UTF-8') {
12060 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12061
            } else {
12062 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12063
            }
12064
        }
12065
12066
        if (
12067 5
            $encoding !== 'UTF-8'
12068
            &&
12069 5
            self::$SUPPORT['mbstring'] === false
12070
        ) {
12071
            /**
12072
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12073
             */
12074
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12075
        }
12076
12077 5
        if (self::$SUPPORT['mbstring'] === true) {
12078 5
            if ($encoding === 'UTF-8') {
12079 5
                return \mb_substr_count($haystack, $needle);
12080
            }
12081
12082 2
            return \mb_substr_count($haystack, $needle, $encoding);
12083
        }
12084
12085
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12086
12087
        return \count($matches);
12088
    }
12089
12090
    /**
12091
     * Count the number of substring occurrences.
12092
     *
12093
     * @param string $haystack <p>
12094
     *                         The string being checked.
12095
     *                         </p>
12096
     * @param string $needle   <p>
12097
     *                         The string being found.
12098
     *                         </p>
12099
     * @param int    $offset   [optional] <p>
12100
     *                         The offset where to start counting
12101
     *                         </p>
12102
     * @param int    $length   [optional] <p>
12103
     *                         The maximum length after the specified offset to search for the
12104
     *                         substring. It outputs a warning if the offset plus the length is
12105
     *                         greater than the haystack length.
12106
     *                         </p>
12107
     *
12108
     * @psalm-pure
12109
     *
12110
     * @return false|int
12111
     *                   <p>The number of times the
12112
     *                   needle substring occurs in the
12113
     *                   haystack string.</p>
12114
     */
12115
    public static function substr_count_in_byte(
12116
        string $haystack,
12117
        string $needle,
12118
        int $offset = 0,
12119
        int $length = null
12120
    ) {
12121 4
        if ($haystack === '' || $needle === '') {
12122 1
            return 0;
12123
        }
12124
12125
        if (
12126 3
            ($offset || $length !== null)
12127
            &&
12128 3
            self::$SUPPORT['mbstring_func_overload'] === true
12129
        ) {
12130
            if ($length === null) {
12131
                $length_tmp = self::strlen($haystack);
12132
                if ($length_tmp === false) {
12133
                    return false;
12134
                }
12135
                $length = (int) $length_tmp;
12136
            }
12137
12138
            if (
12139
                (
12140
                    $length !== 0
12141
                    &&
12142
                    $offset !== 0
12143
                )
12144
                &&
12145
                ($length + $offset) <= 0
12146
                &&
12147
                !Bootup::is_php('7.1') // output from "substr_count()" have changed in PHP 7.1
12148
            ) {
12149
                return false;
12150
            }
12151
12152
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12153
            $haystack_tmp = \substr($haystack, $offset, $length);
12154
            if ($haystack_tmp === false) {
12155
                $haystack_tmp = '';
12156
            }
12157
            $haystack = (string) $haystack_tmp;
12158
        }
12159
12160 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12161
            // "mb_" is available if overload is used, so use it ...
12162
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12163
        }
12164
12165 3
        if ($length === null) {
12166 3
            return \substr_count($haystack, $needle, $offset);
12167
        }
12168
12169
        return \substr_count($haystack, $needle, $offset, $length);
12170
    }
12171
12172
    /**
12173
     * Returns the number of occurrences of $substring in the given string.
12174
     * By default, the comparison is case-sensitive, but can be made insensitive
12175
     * by setting $case_sensitive to false.
12176
     *
12177
     * @param string $str            <p>The input string.</p>
12178
     * @param string $substring      <p>The substring to search for.</p>
12179
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12180
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12181
     *
12182
     * @psalm-pure
12183
     *
12184
     * @return int
12185
     */
12186
    public static function substr_count_simple(
12187
        string $str,
12188
        string $substring,
12189
        bool $case_sensitive = true,
12190
        string $encoding = 'UTF-8'
12191
    ): int {
12192 15
        if ($str === '' || $substring === '') {
12193 2
            return 0;
12194
        }
12195
12196 13
        if ($encoding === 'UTF-8') {
12197 7
            if ($case_sensitive) {
12198
                return (int) \mb_substr_count($str, $substring);
12199
            }
12200
12201 7
            return (int) \mb_substr_count(
12202 7
                \mb_strtoupper($str),
12203 7
                \mb_strtoupper($substring)
12204
            );
12205
        }
12206
12207 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12208
12209 6
        if ($case_sensitive) {
12210 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12211
        }
12212
12213 3
        return (int) \mb_substr_count(
12214 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12215 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12216 3
            $encoding
12217
        );
12218
    }
12219
12220
    /**
12221
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12222
     *
12223
     * EXMAPLE: <code>
12224
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12225
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12226
     * </code>
12227
     *
12228
     * @param string $haystack <p>The string to search in.</p>
12229
     * @param string $needle   <p>The substring to search for.</p>
12230
     *
12231
     * @psalm-pure
12232
     *
12233
     * @return string
12234
     *                <p>Return the sub-string.</p>
12235
     */
12236
    public static function substr_ileft(string $haystack, string $needle): string
12237
    {
12238 2
        if ($haystack === '') {
12239 2
            return '';
12240
        }
12241
12242 2
        if ($needle === '') {
12243 2
            return $haystack;
12244
        }
12245
12246 2
        if (self::str_istarts_with($haystack, $needle)) {
12247 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12248
        }
12249
12250 2
        return $haystack;
12251
    }
12252
12253
    /**
12254
     * Get part of a string process in bytes.
12255
     *
12256
     * @param string $str    <p>The string being checked.</p>
12257
     * @param int    $offset <p>The first position used in str.</p>
12258
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
12259
     *
12260
     * @psalm-pure
12261
     *
12262
     * @return false|string
12263
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12264
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12265
     *                      characters long, <b>FALSE</b> will be returned.
12266
     */
12267
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12268
    {
12269
        // empty string
12270 1
        if ($str === '' || $length === 0) {
12271
            return '';
12272
        }
12273
12274
        // whole string
12275 1
        if (!$offset && $length === null) {
12276
            return $str;
12277
        }
12278
12279 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12280
            // "mb_" is available if overload is used, so use it ...
12281
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12282
        }
12283
12284 1
        return \substr($str, $offset, $length ?? 2147483647);
12285
    }
12286
12287
    /**
12288
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12289
     *
12290
     * EXAMPLE: <code>
12291
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12292
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12293
     * </code>
12294
     *
12295
     * @param string $haystack <p>The string to search in.</p>
12296
     * @param string $needle   <p>The substring to search for.</p>
12297
     *
12298
     * @psalm-pure
12299
     *
12300
     * @return string
12301
     *                <p>Return the sub-string.<p>
12302
     */
12303
    public static function substr_iright(string $haystack, string $needle): string
12304
    {
12305 2
        if ($haystack === '') {
12306 2
            return '';
12307
        }
12308
12309 2
        if ($needle === '') {
12310 2
            return $haystack;
12311
        }
12312
12313 2
        if (self::str_iends_with($haystack, $needle)) {
12314 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12315
        }
12316
12317 2
        return $haystack;
12318
    }
12319
12320
    /**
12321
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12322
     *
12323
     * EXAMPLE: <code>
12324
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12325
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12326
     * </code>
12327
     *
12328
     * @param string $haystack <p>The string to search in.</p>
12329
     * @param string $needle   <p>The substring to search for.</p>
12330
     *
12331
     * @psalm-pure
12332
     *
12333
     * @return string
12334
     *                <p>Return the sub-string.</p>
12335
     */
12336
    public static function substr_left(string $haystack, string $needle): string
12337
    {
12338 2
        if ($haystack === '') {
12339 2
            return '';
12340
        }
12341
12342 2
        if ($needle === '') {
12343 2
            return $haystack;
12344
        }
12345
12346 2
        if (self::str_starts_with($haystack, $needle)) {
12347 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12348
        }
12349
12350 2
        return $haystack;
12351
    }
12352
12353
    /**
12354
     * Replace text within a portion of a string.
12355
     *
12356
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12357
     *
12358
     * source: https://gist.github.com/stemar/8287074
12359
     *
12360
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12361
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12362
     * @param int|int[]       $offset      <p>
12363
     *                                     If start is positive, the replacing will begin at the start'th offset
12364
     *                                     into string.
12365
     *                                     <br><br>
12366
     *                                     If start is negative, the replacing will begin at the start'th character
12367
     *                                     from the end of string.
12368
     *                                     </p>
12369
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12370
     *                                     portion of string which is to be replaced. If it is negative, it
12371
     *                                     represents the number of characters from the end of string at which to
12372
     *                                     stop replacing. If it is not given, then it will default to strlen(
12373
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12374
     *                                     length is zero then this function will have the effect of inserting
12375
     *                                     replacement into string at the given start offset.</p>
12376
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12377
     *
12378
     * @psalm-pure
12379
     *
12380
     * @return string|string[]
12381
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12382
     */
12383
    public static function substr_replace(
12384
        $str,
12385
        $replacement,
12386
        $offset,
12387
        $length = null,
12388
        string $encoding = 'UTF-8'
12389
    ) {
12390 10
        if (\is_array($str)) {
12391 1
            $num = \count($str);
12392
12393
            // the replacement
12394 1
            if (\is_array($replacement)) {
12395 1
                $replacement = \array_slice($replacement, 0, $num);
12396
            } else {
12397 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12398
            }
12399
12400
            // the offset
12401 1
            if (\is_array($offset)) {
12402 1
                $offset = \array_slice($offset, 0, $num);
12403 1
                foreach ($offset as &$value_tmp) {
12404 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12405
                }
12406 1
                unset($value_tmp);
12407
            } else {
12408 1
                $offset = \array_pad([$offset], $num, $offset);
12409
            }
12410
12411
            // the length
12412 1
            if ($length === null) {
12413 1
                $length = \array_fill(0, $num, 0);
12414 1
            } elseif (\is_array($length)) {
12415 1
                $length = \array_slice($length, 0, $num);
12416 1
                foreach ($length as &$value_tmp_V2) {
12417 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12418
                }
12419 1
                unset($value_tmp_V2);
12420
            } else {
12421 1
                $length = \array_pad([$length], $num, $length);
12422
            }
12423
12424
            // recursive call
12425 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12426
        }
12427
12428 10
        if (\is_array($replacement)) {
12429 1
            if ($replacement !== []) {
12430 1
                $replacement = $replacement[0];
12431
            } else {
12432 1
                $replacement = '';
12433
            }
12434
        }
12435
12436
        // init
12437 10
        $str = (string) $str;
12438 10
        $replacement = (string) $replacement;
12439
12440 10
        if (\is_array($length)) {
12441
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12442
        }
12443
12444 10
        if (\is_array($offset)) {
12445
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12446
        }
12447
12448 10
        if ($str === '') {
12449 1
            return $replacement;
12450
        }
12451
12452 9
        if (self::$SUPPORT['mbstring'] === true) {
12453 9
            $string_length = (int) self::strlen($str, $encoding);
12454
12455 9
            if ($offset < 0) {
12456 1
                $offset = (int) \max(0, $string_length + $offset);
12457 9
            } elseif ($offset > $string_length) {
12458 1
                $offset = $string_length;
12459
            }
12460
12461 9
            if ($length !== null && $length < 0) {
12462 1
                $length = (int) \max(0, $string_length - $offset + $length);
12463 9
            } elseif ($length === null || $length > $string_length) {
12464 4
                $length = $string_length;
12465
            }
12466
12467
            /** @noinspection AdditionOperationOnArraysInspection */
12468 9
            if (($offset + $length) > $string_length) {
12469 4
                $length = $string_length - $offset;
12470
            }
12471
12472
            /** @noinspection AdditionOperationOnArraysInspection */
12473 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12474 9
                   $replacement .
12475 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12476
        }
12477
12478
        //
12479
        // fallback for ascii only
12480
        //
12481
12482
        if (ASCII::is_ascii($str)) {
12483
            return ($length === null) ?
12484
                \substr_replace($str, $replacement, $offset) :
12485
                \substr_replace($str, $replacement, $offset, $length);
12486
        }
12487
12488
        //
12489
        // fallback via vanilla php
12490
        //
12491
12492
        \preg_match_all('/./us', $str, $str_matches);
12493
        \preg_match_all('/./us', $replacement, $replacement_matches);
12494
12495
        if ($length === null) {
12496
            $length_tmp = self::strlen($str, $encoding);
12497
            if ($length_tmp === false) {
12498
                // e.g.: non mbstring support + invalid chars
12499
                return '';
12500
            }
12501
            $length = (int) $length_tmp;
12502
        }
12503
12504
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12505
12506
        return \implode('', $str_matches[0]);
12507
    }
12508
12509
    /**
12510
     * Removes a suffix ($needle) from the end of the string ($haystack).
12511
     *
12512
     * EXAMPLE: <code>
12513
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12514
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12515
     * </code>
12516
     *
12517
     * @param string $haystack <p>The string to search in.</p>
12518
     * @param string $needle   <p>The substring to search for.</p>
12519
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12520
     *
12521
     * @psalm-pure
12522
     *
12523
     * @return string
12524
     *                <p>Return the sub-string.</p>
12525
     */
12526
    public static function substr_right(
12527
        string $haystack,
12528
        string $needle,
12529
        string $encoding = 'UTF-8'
12530
    ): string {
12531 2
        if ($haystack === '') {
12532 2
            return '';
12533
        }
12534
12535 2
        if ($needle === '') {
12536 2
            return $haystack;
12537
        }
12538
12539
        if (
12540 2
            $encoding === 'UTF-8'
12541
            &&
12542 2
            \substr($haystack, -\strlen($needle)) === $needle
12543
        ) {
12544 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12545
        }
12546
12547 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12548
            return (string) self::substr(
12549
                $haystack,
12550
                0,
12551
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12552
                $encoding
12553
            );
12554
        }
12555
12556 2
        return $haystack;
12557
    }
12558
12559
    /**
12560
     * Returns a case swapped version of the string.
12561
     *
12562
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12563
     *
12564
     * @param string $str        <p>The input string.</p>
12565
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12566
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12567
     *
12568
     * @psalm-pure
12569
     *
12570
     * @return string
12571
     *                <p>Each character's case swapped.</p>
12572
     */
12573
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12574
    {
12575 6
        if ($str === '') {
12576 1
            return '';
12577
        }
12578
12579 6
        if ($clean_utf8) {
12580
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12581
            // if invalid characters are found in $haystack before $needle
12582 2
            $str = self::clean($str);
12583
        }
12584
12585 6
        if ($encoding === 'UTF-8') {
12586 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12587
        }
12588
12589 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12590
    }
12591
12592
    /**
12593
     * Checks whether symfony-polyfills are used.
12594
     *
12595
     * @psalm-pure
12596
     *
12597
     * @return bool
12598
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
12599
     */
12600
    public static function symfony_polyfill_used(): bool
12601
    {
12602
        // init
12603
        $return = false;
12604
12605
        $return_tmp = \extension_loaded('mbstring');
12606
        if (!$return_tmp && \function_exists('mb_strlen')) {
12607
            $return = true;
12608
        }
12609
12610
        $return_tmp = \extension_loaded('iconv');
12611
        if (!$return_tmp && \function_exists('iconv')) {
12612
            $return = true;
12613
        }
12614
12615
        return $return;
12616
    }
12617
12618
    /**
12619
     * @param string $str
12620
     * @param int    $tab_length
12621
     *
12622
     * @psalm-pure
12623
     *
12624
     * @return string
12625
     */
12626
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12627
    {
12628 6
        if ($tab_length === 4) {
12629 3
            $spaces = '    ';
12630 3
        } elseif ($tab_length === 2) {
12631 1
            $spaces = '  ';
12632
        } else {
12633 2
            $spaces = \str_repeat(' ', $tab_length);
12634
        }
12635
12636 6
        return \str_replace("\t", $spaces, $str);
12637
    }
12638
12639
    /**
12640
     * Converts the first character of each word in the string to uppercase
12641
     * and all other chars to lowercase.
12642
     *
12643
     * @param string      $str                           <p>The input string.</p>
12644
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12645
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12646
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12647
     *                                                   tr</p>
12648
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12649
     *                                                   -> ß</p>
12650
     *
12651
     * @psalm-pure
12652
     *
12653
     * @return string
12654
     *                <p>A string with all characters of $str being title-cased.</p>
12655
     */
12656
    public static function titlecase(
12657
        string $str,
12658
        string $encoding = 'UTF-8',
12659
        bool $clean_utf8 = false,
12660
        string $lang = null,
12661
        bool $try_to_keep_the_string_length = false
12662
    ): string {
12663 5
        if ($clean_utf8) {
12664
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12665
            // if invalid characters are found in $haystack before $needle
12666
            $str = self::clean($str);
12667
        }
12668
12669
        if (
12670 5
            $lang === null
12671
            &&
12672 5
            !$try_to_keep_the_string_length
12673
        ) {
12674 5
            if ($encoding === 'UTF-8') {
12675 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12676
            }
12677
12678 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12679
12680 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12681
        }
12682
12683
        return self::str_titleize(
12684
            $str,
12685
            null,
12686
            $encoding,
12687
            false,
12688
            $lang,
12689
            $try_to_keep_the_string_length,
12690
            false
12691
        );
12692
    }
12693
12694
    /**
12695
     * alias for "UTF8::to_ascii()"
12696
     *
12697
     * @param string $str
12698
     * @param string $subst_chr
12699
     * @param bool   $strict
12700
     *
12701
     * @psalm-pure
12702
     *
12703
     * @return string
12704
     *
12705
     * @see        UTF8::to_ascii()
12706
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12707
     */
12708
    public static function toAscii(
12709
        string $str,
12710
        string $subst_chr = '?',
12711
        bool $strict = false
12712
    ): string {
12713 7
        return self::to_ascii($str, $subst_chr, $strict);
12714
    }
12715
12716
    /**
12717
     * alias for "UTF8::to_iso8859()"
12718
     *
12719
     * @param string|string[] $str
12720
     *
12721
     * @psalm-pure
12722
     *
12723
     * @return string|string[]
12724
     *
12725
     * @see        UTF8::to_iso8859()
12726
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12727
     */
12728
    public static function toIso8859($str)
12729
    {
12730 2
        return self::to_iso8859($str);
12731
    }
12732
12733
    /**
12734
     * alias for "UTF8::to_latin1()"
12735
     *
12736
     * @param string|string[] $str
12737
     *
12738
     * @psalm-pure
12739
     *
12740
     * @return string|string[]
12741
     *
12742
     * @see        UTF8::to_iso8859()
12743
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12744
     */
12745
    public static function toLatin1($str)
12746
    {
12747 2
        return self::to_iso8859($str);
12748
    }
12749
12750
    /**
12751
     * alias for "UTF8::to_utf8()"
12752
     *
12753
     * @param string|string[] $str
12754
     *
12755
     * @psalm-pure
12756
     *
12757
     * @return string|string[]
12758
     *
12759
     * @see        UTF8::to_utf8()
12760
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12761
     */
12762
    public static function toUTF8($str)
12763
    {
12764 2
        return self::to_utf8($str);
12765
    }
12766
12767
    /**
12768
     * Convert a string into ASCII.
12769
     *
12770
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12771
     *
12772
     * @param string $str     <p>The input string.</p>
12773
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12774
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12775
     *                        performance</p>
12776
     *
12777
     * @psalm-pure
12778
     *
12779
     * @return string
12780
     */
12781
    public static function to_ascii(
12782
        string $str,
12783
        string $unknown = '?',
12784
        bool $strict = false
12785
    ): string {
12786 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12787
    }
12788
12789
    /**
12790
     * @param bool|int|string $str
12791
     *
12792
     * @psalm-param bool|int|numeric-string $str
12793
     *
12794
     * @psalm-pure
12795
     *
12796
     * @return bool
12797
     */
12798
    public static function to_boolean($str): bool
12799
    {
12800
        // init
12801 19
        $str = (string) $str;
12802
12803 19
        if ($str === '') {
12804 2
            return false;
12805
        }
12806
12807
        // Info: http://php.net/manual/en/filter.filters.validate.php
12808
        $map = [
12809 17
            'true'  => true,
12810
            '1'     => true,
12811
            'on'    => true,
12812
            'yes'   => true,
12813
            'false' => false,
12814
            '0'     => false,
12815
            'off'   => false,
12816
            'no'    => false,
12817
        ];
12818
12819 17
        if (isset($map[$str])) {
12820 11
            return $map[$str];
12821
        }
12822
12823 6
        $key = \strtolower($str);
12824 6
        if (isset($map[$key])) {
12825 2
            return $map[$key];
12826
        }
12827
12828 4
        if (\is_numeric($str)) {
12829 2
            return ((float) $str + 0) > 0;
12830
        }
12831
12832 2
        return (bool) \trim($str);
12833
    }
12834
12835
    /**
12836
     * Convert given string to safe filename (and keep string case).
12837
     *
12838
     * @param string $str
12839
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12840
     *                                  simply replaced with hyphen.
12841
     * @param string $fallback_char
12842
     *
12843
     * @psalm-pure
12844
     *
12845
     * @return string
12846
     */
12847
    public static function to_filename(
12848
        string $str,
12849
        bool $use_transliterate = false,
12850
        string $fallback_char = '-'
12851
    ): string {
12852 1
        return ASCII::to_filename(
12853 1
            $str,
12854 1
            $use_transliterate,
12855 1
            $fallback_char
12856
        );
12857
    }
12858
12859
    /**
12860
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12861
     *
12862
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12863
     *
12864
     * @param string|string[] $str
12865
     *
12866
     * @psalm-pure
12867
     *
12868
     * @return string|string[]
12869
     */
12870
    public static function to_iso8859($str)
12871
    {
12872 8
        if (\is_array($str)) {
12873 2
            foreach ($str as $k => &$v) {
12874 2
                $v = self::to_iso8859($v);
12875
            }
12876
12877 2
            return $str;
12878
        }
12879
12880 8
        $str = (string) $str;
12881 8
        if ($str === '') {
12882 2
            return '';
12883
        }
12884
12885 8
        return self::utf8_decode($str);
12886
    }
12887
12888
    /**
12889
     * alias for "UTF8::to_iso8859()"
12890
     *
12891
     * @param string|string[] $str
12892
     *
12893
     * @psalm-pure
12894
     *
12895
     * @return string|string[]
12896
     *
12897
     * @see        UTF8::to_iso8859()
12898
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12899
     */
12900
    public static function to_latin1($str)
12901
    {
12902 2
        return self::to_iso8859($str);
12903
    }
12904
12905
    /**
12906
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12907
     *
12908
     * <ul>
12909
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12910
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12911
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12912
     * case.</li>
12913
     * </ul>
12914
     *
12915
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12916
     *
12917
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12918
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12919
     *
12920
     * @psalm-pure
12921
     *
12922
     * @return string|string[]
12923
     *                         <p>The UTF-8 encoded string</p>
12924
     *
12925
     * @template TToUtf8
12926
     * @psalm-param TToUtf8 $str
12927
     * @psalm-return TToUtf8
12928
     *
12929
     * @noinspection SuspiciousBinaryOperationInspection
12930
     */
12931
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12932
    {
12933 44
        if (\is_array($str)) {
12934 4
            foreach ($str as $k => &$v) {
12935 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12936
            }
12937
12938 4
            return $str;
12939
        }
12940
12941
        /** @psalm-var TToUtf8 $str */
12942 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12943
12944 44
        return $str;
12945
    }
12946
12947
    /**
12948
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12949
     *
12950
     * <ul>
12951
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12952
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12953
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12954
     * case.</li>
12955
     * </ul>
12956
     *
12957
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12958
     *
12959
     * @param string $str                        <p>Any string.</p>
12960
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12961
     *
12962
     * @psalm-pure
12963
     *
12964
     * @return string
12965
     *                <p>The UTF-8 encoded string</p>
12966
     *
12967
     * @noinspection SuspiciousBinaryOperationInspection
12968
     */
12969
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
12970
    {
12971 44
        if ($str === '') {
12972 7
            return $str;
12973
        }
12974
12975 44
        $max = \strlen($str);
12976 44
        $buf = '';
12977
12978 44
        for ($i = 0; $i < $max; ++$i) {
12979 44
            $c1 = $str[$i];
12980
12981 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
12982
12983 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
12984
12985 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12986
12987 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
12988 22
                        $buf .= $c1 . $c2;
12989 22
                        ++$i;
12990
                    } else { // not valid UTF8 - convert it
12991 36
                        $buf .= self::to_utf8_convert_helper($c1);
12992
                    }
12993 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
12994
12995 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
12996 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
12997
12998 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
12999 17
                        $buf .= $c1 . $c2 . $c3;
13000 17
                        $i += 2;
13001
                    } else { // not valid UTF8 - convert it
13002 36
                        $buf .= self::to_utf8_convert_helper($c1);
13003
                    }
13004 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13005
13006 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13007 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13008 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13009
13010 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13011 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13012 10
                        $i += 3;
13013
                    } else { // not valid UTF8 - convert it
13014 28
                        $buf .= self::to_utf8_convert_helper($c1);
13015
                    }
13016
                } else { // doesn't look like UTF8, but should be converted
13017
13018 40
                    $buf .= self::to_utf8_convert_helper($c1);
13019
                }
13020 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13021
13022 4
                $buf .= self::to_utf8_convert_helper($c1);
13023
            } else { // it doesn't need conversion
13024
13025 41
                $buf .= $c1;
13026
            }
13027
        }
13028
13029
        // decode unicode escape sequences + unicode surrogate pairs
13030 44
        $buf = \preg_replace_callback(
13031 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13032
            /**
13033
             * @param array $matches
13034
             *
13035
             * @psalm-pure
13036
             *
13037
             * @return string
13038
             */
13039
            static function (array $matches): string {
13040 13
                if (isset($matches[3])) {
13041 13
                    $cp = (int) \hexdec($matches[3]);
13042
                } else {
13043
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13044 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13045 1
                          + (int) \hexdec($matches[2])
13046 1
                          + 0x10000
13047 1
                          - (0xD800 << 10)
13048 1
                          - 0xDC00;
13049
                }
13050
13051
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13052
                //
13053
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13054
13055 13
                if ($cp < 0x80) {
13056 8
                    return (string) self::chr($cp);
13057
                }
13058
13059 10
                if ($cp < 0xA0) {
13060
                    /** @noinspection UnnecessaryCastingInspection */
13061
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13062
                }
13063
13064 10
                return self::decimal_to_chr($cp);
13065 44
            },
13066 44
            $buf
13067
        );
13068
13069 44
        if ($buf === null) {
13070
            return '';
13071
        }
13072
13073
        // decode UTF-8 codepoints
13074 44
        if ($decode_html_entity_to_utf8) {
13075 3
            $buf = self::html_entity_decode($buf);
13076
        }
13077
13078 44
        return $buf;
13079
    }
13080
13081
    /**
13082
     * Returns the given string as an integer, or null if the string isn't numeric.
13083
     *
13084
     * @param string $str
13085
     *
13086
     * @psalm-pure
13087
     *
13088
     * @return int|null
13089
     *                  <p>null if the string isn't numeric</p>
13090
     */
13091
    public static function to_int(string $str)
13092
    {
13093 1
        if (\is_numeric($str)) {
13094 1
            return (int) $str;
13095
        }
13096
13097 1
        return null;
13098
    }
13099
13100
    /**
13101
     * Returns the given input as string, or null if the input isn't int|float|string
13102
     * and do not implement the "__toString()" method.
13103
     *
13104
     * @param float|int|object|string|null $input
13105
     *
13106
     * @psalm-pure
13107
     *
13108
     * @return string|null
13109
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13110
     */
13111
    public static function to_string($input)
13112
    {
13113 1
        if ($input === null) {
13114
            return null;
13115
        }
13116
13117
        /** @var string $input_type - hack for psalm */
13118 1
        $input_type = \gettype($input);
13119
13120
        if (
13121 1
            $input_type === 'string'
13122
            ||
13123 1
            $input_type === 'integer'
13124
            ||
13125 1
            $input_type === 'float'
13126
            ||
13127 1
            $input_type === 'double'
13128
        ) {
13129 1
            return (string) $input;
13130
        }
13131
13132 1
        if ($input_type === 'object') {
13133
            /** @noinspection PhpSillyAssignmentInspection */
13134
            /** @var object $input - hack for psalm / phpstan */
13135 1
            $input = $input;
13136
            /** @noinspection NestedPositiveIfStatementsInspection */
13137
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13138 1
            if (\method_exists($input, '__toString')) {
13139 1
                return (string) $input;
13140
            }
13141
        }
13142
13143 1
        return null;
13144
    }
13145
13146
    /**
13147
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13148
     *
13149
     * INFO: This is slower then "trim()"
13150
     *
13151
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13152
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13153
     *
13154
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13155
     *
13156
     * @param string      $str   <p>The string to be trimmed</p>
13157
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13158
     *
13159
     * @psalm-pure
13160
     *
13161
     * @return string
13162
     *                <p>The trimmed string.</p>
13163
     */
13164
    public static function trim(string $str = '', string $chars = null): string
13165
    {
13166 57
        if ($str === '') {
13167 9
            return '';
13168
        }
13169
13170 50
        if (self::$SUPPORT['mbstring'] === true) {
13171 50
            if ($chars !== null) {
13172
                /** @noinspection PregQuoteUsageInspection */
13173 28
                $chars = \preg_quote($chars);
13174 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13175
            } else {
13176 22
                $pattern = '^[\\s]+|[\\s]+$';
13177
            }
13178
13179
            /** @noinspection PhpComposerExtensionStubsInspection */
13180 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13181
        }
13182
13183 8
        if ($chars !== null) {
13184
            $chars = \preg_quote($chars, '/');
13185
            $pattern = "^[${chars}]+|[${chars}]+\$";
13186
        } else {
13187 8
            $pattern = '^[\\s]+|[\\s]+$';
13188
        }
13189
13190 8
        return self::regex_replace($str, $pattern, '');
13191
    }
13192
13193
    /**
13194
     * Makes string's first char uppercase.
13195
     *
13196
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13197
     *
13198
     * @param string      $str                           <p>The input string.</p>
13199
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13200
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13201
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13202
     *                                                   tr</p>
13203
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13204
     *                                                   -> ß</p>
13205
     *
13206
     * @psalm-pure
13207
     *
13208
     * @return string
13209
     *                <p>The resulting string with with char uppercase.</p>
13210
     */
13211
    public static function ucfirst(
13212
        string $str,
13213
        string $encoding = 'UTF-8',
13214
        bool $clean_utf8 = false,
13215
        string $lang = null,
13216
        bool $try_to_keep_the_string_length = false
13217
    ): string {
13218 69
        if ($str === '') {
13219 3
            return '';
13220
        }
13221
13222 68
        if ($clean_utf8) {
13223
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13224
            // if invalid characters are found in $haystack before $needle
13225 1
            $str = self::clean($str);
13226
        }
13227
13228 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13229
13230 68
        if ($encoding === 'UTF-8') {
13231 22
            $str_part_two = (string) \mb_substr($str, 1);
13232
13233 22
            if ($use_mb_functions) {
13234 22
                $str_part_one = \mb_strtoupper(
13235 22
                    (string) \mb_substr($str, 0, 1)
13236
                );
13237
            } else {
13238
                $str_part_one = self::strtoupper(
13239
                    (string) \mb_substr($str, 0, 1),
13240
                    $encoding,
13241
                    false,
13242
                    $lang,
13243 22
                    $try_to_keep_the_string_length
13244
                );
13245
            }
13246
        } else {
13247 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13248
13249 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13250
13251 47
            if ($use_mb_functions) {
13252 47
                $str_part_one = \mb_strtoupper(
13253 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13254 47
                    $encoding
13255
                );
13256
            } else {
13257
                $str_part_one = self::strtoupper(
13258
                    (string) self::substr($str, 0, 1, $encoding),
13259
                    $encoding,
13260
                    false,
13261
                    $lang,
13262
                    $try_to_keep_the_string_length
13263
                );
13264
            }
13265
        }
13266
13267 68
        return $str_part_one . $str_part_two;
13268
    }
13269
13270
    /**
13271
     * alias for "UTF8::ucfirst()"
13272
     *
13273
     * @param string $str
13274
     * @param string $encoding
13275
     * @param bool   $clean_utf8
13276
     *
13277
     * @psalm-pure
13278
     *
13279
     * @return string
13280
     *
13281
     * @see        UTF8::ucfirst()
13282
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13283
     */
13284
    public static function ucword(
13285
        string $str,
13286
        string $encoding = 'UTF-8',
13287
        bool $clean_utf8 = false
13288
    ): string {
13289 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13290
    }
13291
13292
    /**
13293
     * Uppercase for all words in the string.
13294
     *
13295
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13296
     *
13297
     * @param string   $str        <p>The input string.</p>
13298
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13299
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13300
     *                             word.</p>
13301
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13302
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13303
     *
13304
     * @psalm-pure
13305
     *
13306
     * @return string
13307
     */
13308
    public static function ucwords(
13309
        string $str,
13310
        array $exceptions = [],
13311
        string $char_list = '',
13312
        string $encoding = 'UTF-8',
13313
        bool $clean_utf8 = false
13314
    ): string {
13315 8
        if (!$str) {
13316 2
            return '';
13317
        }
13318
13319
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13320
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13321
13322 7
        if ($clean_utf8) {
13323
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13324
            // if invalid characters are found in $haystack before $needle
13325 1
            $str = self::clean($str);
13326
        }
13327
13328 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13329
13330
        if (
13331 7
            $use_php_default_functions
13332
            &&
13333 7
            ASCII::is_ascii($str)
13334
        ) {
13335
            return \ucwords($str);
13336
        }
13337
13338 7
        $words = self::str_to_words($str, $char_list);
13339 7
        $use_exceptions = $exceptions !== [];
13340
13341 7
        $words_str = '';
13342 7
        foreach ($words as &$word) {
13343 7
            if (!$word) {
13344 7
                continue;
13345
            }
13346
13347
            if (
13348 7
                !$use_exceptions
13349
                ||
13350 7
                !\in_array($word, $exceptions, true)
13351
            ) {
13352 7
                $words_str .= self::ucfirst($word, $encoding);
13353
            } else {
13354 7
                $words_str .= $word;
13355
            }
13356
        }
13357
13358 7
        return $words_str;
13359
    }
13360
13361
    /**
13362
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13363
     *
13364
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13365
     *
13366
     * e.g:
13367
     * 'test+test'                     => 'test test'
13368
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13369
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13370
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13371
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13372
     * 'Düsseldorf'                   => 'Düsseldorf'
13373
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13374
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13375
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13376
     *
13377
     * @param string $str          <p>The input string.</p>
13378
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13379
     *
13380
     * @psalm-pure
13381
     *
13382
     * @return string
13383
     */
13384
    public static function urldecode(string $str, bool $multi_decode = true): string
13385
    {
13386 4
        if ($str === '') {
13387 3
            return '';
13388
        }
13389
13390
        if (
13391 4
            \strpos($str, '&') === false
13392
            &&
13393 4
            \strpos($str, '%') === false
13394
            &&
13395 4
            \strpos($str, '+') === false
13396
            &&
13397 4
            \strpos($str, '\u') === false
13398
        ) {
13399 3
            return self::fix_simple_utf8($str);
13400
        }
13401
13402 4
        $str = self::urldecode_unicode_helper($str);
13403
13404 4
        if ($multi_decode) {
13405
            do {
13406 3
                $str_compare = $str;
13407
13408
                /**
13409
                 * @psalm-suppress PossiblyInvalidArgument
13410
                 */
13411 3
                $str = self::fix_simple_utf8(
13412 3
                    \urldecode(
13413 3
                        self::html_entity_decode(
13414 3
                            self::to_utf8($str),
13415 3
                            \ENT_QUOTES | \ENT_HTML5
13416
                        )
13417
                    )
13418
                );
13419 3
            } while ($str_compare !== $str);
13420
        } else {
13421
            /**
13422
             * @psalm-suppress PossiblyInvalidArgument
13423
             */
13424 1
            $str = self::fix_simple_utf8(
13425 1
                \urldecode(
13426 1
                    self::html_entity_decode(
13427 1
                        self::to_utf8($str),
13428 1
                        \ENT_QUOTES | \ENT_HTML5
13429
                    )
13430
                )
13431
            );
13432
        }
13433
13434 4
        return $str;
13435
    }
13436
13437
    /**
13438
     * Return a array with "urlencoded"-win1252 -> UTF-8
13439
     *
13440
     * @psalm-pure
13441
     *
13442
     * @return string[]
13443
     *
13444
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13445
     */
13446
    public static function urldecode_fix_win1252_chars(): array
13447
    {
13448
        return [
13449 2
            '%20' => ' ',
13450
            '%21' => '!',
13451
            '%22' => '"',
13452
            '%23' => '#',
13453
            '%24' => '$',
13454
            '%25' => '%',
13455
            '%26' => '&',
13456
            '%27' => "'",
13457
            '%28' => '(',
13458
            '%29' => ')',
13459
            '%2A' => '*',
13460
            '%2B' => '+',
13461
            '%2C' => ',',
13462
            '%2D' => '-',
13463
            '%2E' => '.',
13464
            '%2F' => '/',
13465
            '%30' => '0',
13466
            '%31' => '1',
13467
            '%32' => '2',
13468
            '%33' => '3',
13469
            '%34' => '4',
13470
            '%35' => '5',
13471
            '%36' => '6',
13472
            '%37' => '7',
13473
            '%38' => '8',
13474
            '%39' => '9',
13475
            '%3A' => ':',
13476
            '%3B' => ';',
13477
            '%3C' => '<',
13478
            '%3D' => '=',
13479
            '%3E' => '>',
13480
            '%3F' => '?',
13481
            '%40' => '@',
13482
            '%41' => 'A',
13483
            '%42' => 'B',
13484
            '%43' => 'C',
13485
            '%44' => 'D',
13486
            '%45' => 'E',
13487
            '%46' => 'F',
13488
            '%47' => 'G',
13489
            '%48' => 'H',
13490
            '%49' => 'I',
13491
            '%4A' => 'J',
13492
            '%4B' => 'K',
13493
            '%4C' => 'L',
13494
            '%4D' => 'M',
13495
            '%4E' => 'N',
13496
            '%4F' => 'O',
13497
            '%50' => 'P',
13498
            '%51' => 'Q',
13499
            '%52' => 'R',
13500
            '%53' => 'S',
13501
            '%54' => 'T',
13502
            '%55' => 'U',
13503
            '%56' => 'V',
13504
            '%57' => 'W',
13505
            '%58' => 'X',
13506
            '%59' => 'Y',
13507
            '%5A' => 'Z',
13508
            '%5B' => '[',
13509
            '%5C' => '\\',
13510
            '%5D' => ']',
13511
            '%5E' => '^',
13512
            '%5F' => '_',
13513
            '%60' => '`',
13514
            '%61' => 'a',
13515
            '%62' => 'b',
13516
            '%63' => 'c',
13517
            '%64' => 'd',
13518
            '%65' => 'e',
13519
            '%66' => 'f',
13520
            '%67' => 'g',
13521
            '%68' => 'h',
13522
            '%69' => 'i',
13523
            '%6A' => 'j',
13524
            '%6B' => 'k',
13525
            '%6C' => 'l',
13526
            '%6D' => 'm',
13527
            '%6E' => 'n',
13528
            '%6F' => 'o',
13529
            '%70' => 'p',
13530
            '%71' => 'q',
13531
            '%72' => 'r',
13532
            '%73' => 's',
13533
            '%74' => 't',
13534
            '%75' => 'u',
13535
            '%76' => 'v',
13536
            '%77' => 'w',
13537
            '%78' => 'x',
13538
            '%79' => 'y',
13539
            '%7A' => 'z',
13540
            '%7B' => '{',
13541
            '%7C' => '|',
13542
            '%7D' => '}',
13543
            '%7E' => '~',
13544
            '%7F' => '',
13545
            '%80' => '`',
13546
            '%81' => '',
13547
            '%82' => '‚',
13548
            '%83' => 'ƒ',
13549
            '%84' => '„',
13550
            '%85' => '…',
13551
            '%86' => '†',
13552
            '%87' => '‡',
13553
            '%88' => 'ˆ',
13554
            '%89' => '‰',
13555
            '%8A' => 'Š',
13556
            '%8B' => '‹',
13557
            '%8C' => 'Œ',
13558
            '%8D' => '',
13559
            '%8E' => 'Ž',
13560
            '%8F' => '',
13561
            '%90' => '',
13562
            '%91' => '‘',
13563
            '%92' => '’',
13564
            '%93' => '“',
13565
            '%94' => '”',
13566
            '%95' => '•',
13567
            '%96' => '–',
13568
            '%97' => '—',
13569
            '%98' => '˜',
13570
            '%99' => '™',
13571
            '%9A' => 'š',
13572
            '%9B' => '›',
13573
            '%9C' => 'œ',
13574
            '%9D' => '',
13575
            '%9E' => 'ž',
13576
            '%9F' => 'Ÿ',
13577
            '%A0' => '',
13578
            '%A1' => '¡',
13579
            '%A2' => '¢',
13580
            '%A3' => '£',
13581
            '%A4' => '¤',
13582
            '%A5' => '¥',
13583
            '%A6' => '¦',
13584
            '%A7' => '§',
13585
            '%A8' => '¨',
13586
            '%A9' => '©',
13587
            '%AA' => 'ª',
13588
            '%AB' => '«',
13589
            '%AC' => '¬',
13590
            '%AD' => '',
13591
            '%AE' => '®',
13592
            '%AF' => '¯',
13593
            '%B0' => '°',
13594
            '%B1' => '±',
13595
            '%B2' => '²',
13596
            '%B3' => '³',
13597
            '%B4' => '´',
13598
            '%B5' => 'µ',
13599
            '%B6' => '¶',
13600
            '%B7' => '·',
13601
            '%B8' => '¸',
13602
            '%B9' => '¹',
13603
            '%BA' => 'º',
13604
            '%BB' => '»',
13605
            '%BC' => '¼',
13606
            '%BD' => '½',
13607
            '%BE' => '¾',
13608
            '%BF' => '¿',
13609
            '%C0' => 'À',
13610
            '%C1' => 'Á',
13611
            '%C2' => 'Â',
13612
            '%C3' => 'Ã',
13613
            '%C4' => 'Ä',
13614
            '%C5' => 'Å',
13615
            '%C6' => 'Æ',
13616
            '%C7' => 'Ç',
13617
            '%C8' => 'È',
13618
            '%C9' => 'É',
13619
            '%CA' => 'Ê',
13620
            '%CB' => 'Ë',
13621
            '%CC' => 'Ì',
13622
            '%CD' => 'Í',
13623
            '%CE' => 'Î',
13624
            '%CF' => 'Ï',
13625
            '%D0' => 'Ð',
13626
            '%D1' => 'Ñ',
13627
            '%D2' => 'Ò',
13628
            '%D3' => 'Ó',
13629
            '%D4' => 'Ô',
13630
            '%D5' => 'Õ',
13631
            '%D6' => 'Ö',
13632
            '%D7' => '×',
13633
            '%D8' => 'Ø',
13634
            '%D9' => 'Ù',
13635
            '%DA' => 'Ú',
13636
            '%DB' => 'Û',
13637
            '%DC' => 'Ü',
13638
            '%DD' => 'Ý',
13639
            '%DE' => 'Þ',
13640
            '%DF' => 'ß',
13641
            '%E0' => 'à',
13642
            '%E1' => 'á',
13643
            '%E2' => 'â',
13644
            '%E3' => 'ã',
13645
            '%E4' => 'ä',
13646
            '%E5' => 'å',
13647
            '%E6' => 'æ',
13648
            '%E7' => 'ç',
13649
            '%E8' => 'è',
13650
            '%E9' => 'é',
13651
            '%EA' => 'ê',
13652
            '%EB' => 'ë',
13653
            '%EC' => 'ì',
13654
            '%ED' => 'í',
13655
            '%EE' => 'î',
13656
            '%EF' => 'ï',
13657
            '%F0' => 'ð',
13658
            '%F1' => 'ñ',
13659
            '%F2' => 'ò',
13660
            '%F3' => 'ó',
13661
            '%F4' => 'ô',
13662
            '%F5' => 'õ',
13663
            '%F6' => 'ö',
13664
            '%F7' => '÷',
13665
            '%F8' => 'ø',
13666
            '%F9' => 'ù',
13667
            '%FA' => 'ú',
13668
            '%FB' => 'û',
13669
            '%FC' => 'ü',
13670
            '%FD' => 'ý',
13671
            '%FE' => 'þ',
13672
            '%FF' => 'ÿ',
13673
        ];
13674
    }
13675
13676
    /**
13677
     * Decodes a UTF-8 string to ISO-8859-1.
13678
     *
13679
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13680
     *
13681
     * @param string $str             <p>The input string.</p>
13682
     * @param bool   $keep_utf8_chars
13683
     *
13684
     * @psalm-pure
13685
     *
13686
     * @return string
13687
     *
13688
     * @noinspection SuspiciousBinaryOperationInspection
13689
     */
13690
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13691
    {
13692 14
        if ($str === '') {
13693 6
            return '';
13694
        }
13695
13696
        // save for later comparision
13697 14
        $str_backup = $str;
13698 14
        $len = \strlen($str);
13699
13700 14
        if (self::$ORD === null) {
13701
            self::$ORD = self::getData('ord');
13702
        }
13703
13704 14
        if (self::$CHR === null) {
13705
            self::$CHR = self::getData('chr');
13706
        }
13707
13708 14
        $no_char_found = '?';
13709
        /** @noinspection ForeachInvariantsInspection */
13710 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13711 14
            switch ($str[$i] & "\xF0") {
13712 14
                case "\xC0":
13713 13
                case "\xD0":
13714 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13715 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13716
13717 13
                    break;
13718
13719
                /** @noinspection PhpMissingBreakStatementInspection */
13720 13
                case "\xF0":
13721
                    ++$i;
13722
13723
                // no break
13724
13725 13
                case "\xE0":
13726 11
                    $str[$j] = $no_char_found;
13727 11
                    $i += 2;
13728
13729 11
                    break;
13730
13731
                default:
13732 12
                    $str[$j] = $str[$i];
13733
            }
13734
        }
13735
13736
        /** @var false|string $return - needed for PhpStan (stubs error) */
13737 14
        $return = \substr($str, 0, $j);
13738 14
        if ($return === false) {
13739
            $return = '';
13740
        }
13741
13742
        if (
13743 14
            $keep_utf8_chars
13744
            &&
13745 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13746
        ) {
13747 2
            return $str_backup;
13748
        }
13749
13750 14
        return $return;
13751
    }
13752
13753
    /**
13754
     * Encodes an ISO-8859-1 string to UTF-8.
13755
     *
13756
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13757
     *
13758
     * @param string $str <p>The input string.</p>
13759
     *
13760
     * @psalm-pure
13761
     *
13762
     * @return string
13763
     */
13764
    public static function utf8_encode(string $str): string
13765
    {
13766 16
        if ($str === '') {
13767 14
            return '';
13768
        }
13769
13770
        /** @var false|string $str - the polyfill maybe return false */
13771 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13771
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13772
13773
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13774
        /** @psalm-suppress TypeDoesNotContainType */
13775 16
        if ($str === false) {
13776
            return '';
13777
        }
13778
13779 16
        return $str;
13780
    }
13781
13782
    /**
13783
     * fix -> utf8-win1252 chars
13784
     *
13785
     * @param string $str <p>The input string.</p>
13786
     *
13787
     * @psalm-pure
13788
     *
13789
     * @return string
13790
     *
13791
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
13792
     */
13793
    public static function utf8_fix_win1252_chars(string $str): string
13794
    {
13795 2
        return self::fix_simple_utf8($str);
13796
    }
13797
13798
    /**
13799
     * Returns an array with all utf8 whitespace characters.
13800
     *
13801
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
13802
     *
13803
     * @psalm-pure
13804
     *
13805
     * @return string[]
13806
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
13807
     *                  as defined in above URL
13808
     */
13809
    public static function whitespace_table(): array
13810
    {
13811 2
        return self::$WHITESPACE_TABLE;
13812
    }
13813
13814
    /**
13815
     * Limit the number of words in a string.
13816
     *
13817
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
13818
     *
13819
     * @param string $str        <p>The input string.</p>
13820
     * @param int    $limit      <p>The limit of words as integer.</p>
13821
     * @param string $str_add_on <p>Replacement for the striped string.</p>
13822
     *
13823
     * @psalm-pure
13824
     *
13825
     * @return string
13826
     */
13827
    public static function words_limit(
13828
        string $str,
13829
        int $limit = 100,
13830
        string $str_add_on = '…'
13831
    ): string {
13832 2
        if ($str === '' || $limit < 1) {
13833 2
            return '';
13834
        }
13835
13836 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
13837
13838
        if (
13839 2
            !isset($matches[0])
13840
            ||
13841 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
13842
        ) {
13843 2
            return $str;
13844
        }
13845
13846 2
        return \rtrim($matches[0]) . $str_add_on;
13847
    }
13848
13849
    /**
13850
     * Wraps a string to a given number of characters
13851
     *
13852
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
13853
     *
13854
     * @see http://php.net/manual/en/function.wordwrap.php
13855
     *
13856
     * @param string $str   <p>The input string.</p>
13857
     * @param int    $width [optional] <p>The column width.</p>
13858
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13859
     * @param bool   $cut   [optional] <p>
13860
     *                      If the cut is set to true, the string is
13861
     *                      always wrapped at or before the specified width. So if you have
13862
     *                      a word that is larger than the given width, it is broken apart.
13863
     *                      </p>
13864
     *
13865
     * @psalm-pure
13866
     *
13867
     * @return string
13868
     *                <p>The given string wrapped at the specified column.</p>
13869
     */
13870
    public static function wordwrap(
13871
        string $str,
13872
        int $width = 75,
13873
        string $break = "\n",
13874
        bool $cut = false
13875
    ): string {
13876 12
        if ($str === '' || $break === '') {
13877 4
            return '';
13878
        }
13879
13880 10
        $str_split = \explode($break, $str);
13881 10
        if ($str_split === false) {
13882
            return '';
13883
        }
13884
13885
        /** @var string[] $charsArray */
13886 10
        $charsArray = [];
13887 10
        $word_split = '';
13888 10
        foreach ($str_split as $i => $i_value) {
13889 10
            if ($i) {
13890 3
                $charsArray[] = $break;
13891 3
                $word_split .= '#';
13892
            }
13893
13894 10
            foreach (self::str_split($i_value) as $c) {
13895 10
                $charsArray[] = $c;
13896 10
                if ($c === ' ') {
13897 3
                    $word_split .= ' ';
13898
                } else {
13899 10
                    $word_split .= '?';
13900
                }
13901
            }
13902
        }
13903
13904 10
        $str_return = '';
13905 10
        $j = 0;
13906 10
        $b = -1;
13907 10
        $i = -1;
13908 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13909
13910 10
        $max = \mb_strlen($word_split);
13911 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13912 8
            for (++$i; $i < $b; ++$i) {
13913 8
                if (isset($charsArray[$j])) {
13914 8
                    $str_return .= $charsArray[$j];
13915 8
                    unset($charsArray[$j]);
13916
                }
13917 8
                ++$j;
13918
13919
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13920 8
                if ($i > $max) {
13921
                    break 2;
13922
                }
13923
            }
13924
13925
            if (
13926 8
                $break === $charsArray[$j]
13927
                ||
13928 8
                $charsArray[$j] === ' '
13929
            ) {
13930 5
                unset($charsArray[$j++]);
13931
            }
13932
13933 8
            $str_return .= $break;
13934
13935
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13936 8
            if ($b > $max) {
13937
                break;
13938
            }
13939
        }
13940
13941 10
        return $str_return . \implode('', $charsArray);
13942
    }
13943
13944
    /**
13945
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13946
     *    ... so that we wrap the per line.
13947
     *
13948
     * @param string      $str             <p>The input string.</p>
13949
     * @param int         $width           [optional] <p>The column width.</p>
13950
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13951
     * @param bool        $cut             [optional] <p>
13952
     *                                     If the cut is set to true, the string is
13953
     *                                     always wrapped at or before the specified width. So if you have
13954
     *                                     a word that is larger than the given width, it is broken apart.
13955
     *                                     </p>
13956
     * @param bool        $add_final_break [optional] <p>
13957
     *                                     If this flag is true, then the method will add a $break at the end
13958
     *                                     of the result string.
13959
     *                                     </p>
13960
     * @param string|null $delimiter       [optional] <p>
13961
     *                                     You can change the default behavior, where we split the string by newline.
13962
     *                                     </p>
13963
     *
13964
     * @psalm-pure
13965
     *
13966
     * @return string
13967
     */
13968
    public static function wordwrap_per_line(
13969
        string $str,
13970
        int $width = 75,
13971
        string $break = "\n",
13972
        bool $cut = false,
13973
        bool $add_final_break = true,
13974
        string $delimiter = null
13975
    ): string {
13976 1
        if ($delimiter === null) {
13977 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
13978
        } else {
13979 1
            $strings = \explode($delimiter, $str);
13980
        }
13981
13982 1
        $string_helper_array = [];
13983 1
        if ($strings !== false) {
13984 1
            foreach ($strings as $value) {
13985 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
13986
            }
13987
        }
13988
13989 1
        if ($add_final_break) {
13990 1
            $final_break = $break;
13991
        } else {
13992 1
            $final_break = '';
13993
        }
13994
13995 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
13996
    }
13997
13998
    /**
13999
     * Returns an array of Unicode White Space characters.
14000
     *
14001
     * @psalm-pure
14002
     *
14003
     * @return string[]
14004
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14005
     */
14006
    public static function ws(): array
14007
    {
14008 2
        return self::$WHITESPACE;
14009
    }
14010
14011
    /**
14012
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14013
     *
14014
     * EXAMPLE: <code>
14015
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14016
     * //
14017
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14018
     * </code>
14019
     *
14020
     * @see          http://hsivonen.iki.fi/php-utf8/
14021
     *
14022
     * @param string $str    <p>The string to be checked.</p>
14023
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14024
     *
14025
     * @psalm-pure
14026
     *
14027
     * @return bool
14028
     *
14029
     * @noinspection ReturnTypeCanBeDeclaredInspection
14030
     */
14031
    private static function is_utf8_string(string $str, bool $strict = false)
14032
    {
14033 110
        if ($str === '') {
14034 15
            return true;
14035
        }
14036
14037 103
        if ($strict) {
14038 2
            $is_binary = self::is_binary($str, true);
14039
14040 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14041 2
                return false;
14042
            }
14043
14044
            if ($is_binary && self::is_utf32($str, false) !== false) {
14045
                return false;
14046
            }
14047
        }
14048
14049 103
        if (self::$SUPPORT['pcre_utf8']) {
14050
            // If even just the first character can be matched, when the /u
14051
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14052
            // invalid, nothing at all will match, even if the string contains
14053
            // some valid sequences
14054 103
            return \preg_match('/^./us', $str) === 1;
14055
        }
14056
14057 2
        $mState = 0; // cached expected number of octets after the current octet
14058
        // until the beginning of the next UTF8 character sequence
14059 2
        $mUcs4 = 0; // cached Unicode character
14060 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14061
14062 2
        if (self::$ORD === null) {
14063
            self::$ORD = self::getData('ord');
14064
        }
14065
14066 2
        $len = \strlen($str);
14067
        /** @noinspection ForeachInvariantsInspection */
14068 2
        for ($i = 0; $i < $len; ++$i) {
14069 2
            $in = self::$ORD[$str[$i]];
14070
14071 2
            if ($mState === 0) {
14072
                // When mState is zero we expect either a US-ASCII character or a
14073
                // multi-octet sequence.
14074 2
                if ((0x80 & $in) === 0) {
14075
                    // US-ASCII, pass straight through.
14076 2
                    $mBytes = 1;
14077 2
                } elseif ((0xE0 & $in) === 0xC0) {
14078
                    // First octet of 2 octet sequence.
14079 2
                    $mUcs4 = $in;
14080 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14081 2
                    $mState = 1;
14082 2
                    $mBytes = 2;
14083 2
                } elseif ((0xF0 & $in) === 0xE0) {
14084
                    // First octet of 3 octet sequence.
14085 2
                    $mUcs4 = $in;
14086 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14087 2
                    $mState = 2;
14088 2
                    $mBytes = 3;
14089
                } elseif ((0xF8 & $in) === 0xF0) {
14090
                    // First octet of 4 octet sequence.
14091
                    $mUcs4 = $in;
14092
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14093
                    $mState = 3;
14094
                    $mBytes = 4;
14095
                } elseif ((0xFC & $in) === 0xF8) {
14096
                    /* First octet of 5 octet sequence.
14097
                     *
14098
                     * This is illegal because the encoded codepoint must be either
14099
                     * (a) not the shortest form or
14100
                     * (b) outside the Unicode range of 0-0x10FFFF.
14101
                     * Rather than trying to resynchronize, we will carry on until the end
14102
                     * of the sequence and let the later error handling code catch it.
14103
                     */
14104
                    $mUcs4 = $in;
14105
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14106
                    $mState = 4;
14107
                    $mBytes = 5;
14108
                } elseif ((0xFE & $in) === 0xFC) {
14109
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14110
                    $mUcs4 = $in;
14111
                    $mUcs4 = ($mUcs4 & 1) << 30;
14112
                    $mState = 5;
14113
                    $mBytes = 6;
14114
                } else {
14115
                    // Current octet is neither in the US-ASCII range nor a legal first
14116
                    // octet of a multi-octet sequence.
14117 2
                    return false;
14118
                }
14119 2
            } elseif ((0xC0 & $in) === 0x80) {
14120
14121
                // When mState is non-zero, we expect a continuation of the multi-octet
14122
                // sequence
14123
14124
                // Legal continuation.
14125 2
                $shift = ($mState - 1) * 6;
14126 2
                $tmp = $in;
14127 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14128 2
                $mUcs4 |= $tmp;
14129
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14130
                // Unicode code point to be output.
14131 2
                if (--$mState === 0) {
14132
                    // Check for illegal sequences and code points.
14133
                    //
14134
                    // From Unicode 3.1, non-shortest form is illegal
14135
                    if (
14136 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14137
                        ||
14138 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14139
                        ||
14140 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14141
                        ||
14142 2
                        ($mBytes > 4)
14143
                        ||
14144
                        // From Unicode 3.2, surrogate characters are illegal.
14145 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14146
                        ||
14147
                        // Code points outside the Unicode range are illegal.
14148 2
                        ($mUcs4 > 0x10FFFF)
14149
                    ) {
14150
                        return false;
14151
                    }
14152
                    // initialize UTF8 cache
14153 2
                    $mState = 0;
14154 2
                    $mUcs4 = 0;
14155 2
                    $mBytes = 1;
14156
                }
14157
            } else {
14158
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14159
                // Incomplete multi-octet sequence.
14160
                return false;
14161
            }
14162
        }
14163
14164 2
        return $mState === 0;
14165
    }
14166
14167
    /**
14168
     * @param string $str
14169
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14170
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14171
     *
14172
     * @psalm-pure
14173
     *
14174
     * @return string
14175
     *
14176
     * @noinspection ReturnTypeCanBeDeclaredInspection
14177
     */
14178
    private static function fixStrCaseHelper(
14179
        string $str,
14180
        bool $use_lowercase = false,
14181
        bool $use_full_case_fold = false
14182
    ) {
14183 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14184 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14185
14186 33
        if ($use_lowercase) {
14187 2
            $str = \str_replace(
14188 2
                $upper,
14189 2
                $lower,
14190 2
                $str
14191
            );
14192
        } else {
14193 31
            $str = \str_replace(
14194 31
                $lower,
14195 31
                $upper,
14196 31
                $str
14197
            );
14198
        }
14199
14200 33
        if ($use_full_case_fold) {
14201
            /**
14202
             * @psalm-suppress ImpureStaticVariable
14203
             *
14204
             * @var array<mixed>|null
14205
             */
14206 31
            static $FULL_CASE_FOLD = null;
14207 31
            if ($FULL_CASE_FOLD === null) {
14208 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14209
            }
14210
14211 31
            if ($use_lowercase) {
14212 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14213
            } else {
14214 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14215
            }
14216
        }
14217
14218 33
        return $str;
14219
    }
14220
14221
    /**
14222
     * get data from "/data/*.php"
14223
     *
14224
     * @param string $file
14225
     *
14226
     * @psalm-pure
14227
     *
14228
     * @return array
14229
     *
14230
     * @noinspection ReturnTypeCanBeDeclaredInspection
14231
     */
14232
    private static function getData(string $file)
14233
    {
14234
        /** @noinspection PhpIncludeInspection */
14235
        /** @noinspection UsingInclusionReturnValueInspection */
14236
        /** @psalm-suppress UnresolvableInclude */
14237 6
        return include __DIR__ . '/data/' . $file . '.php';
14238
    }
14239
14240
    /**
14241
     * @psalm-pure
14242
     *
14243
     * @return true|null
14244
     */
14245
    private static function initEmojiData()
14246
    {
14247 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14248 1
            if (self::$EMOJI === null) {
14249 1
                self::$EMOJI = self::getData('emoji');
14250
            }
14251
14252
            /**
14253
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14254
             */
14255 1
            \uksort(
14256 1
                self::$EMOJI,
14257
                static function (string $a, string $b): int {
14258 1
                    return \strlen($b) <=> \strlen($a);
14259 1
                }
14260
            );
14261
14262 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14263 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14264
14265 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14266 1
                $tmp_key = \crc32($key);
14267 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14268
            }
14269
14270 1
            return true;
14271
        }
14272
14273 12
        return null;
14274
    }
14275
14276
    /**
14277
     * Checks whether mbstring "overloaded" is active on the server.
14278
     *
14279
     * @psalm-pure
14280
     *
14281
     * @return bool
14282
     *
14283
     * @noinspection ReturnTypeCanBeDeclaredInspection
14284
     */
14285
    private static function mbstring_overloaded()
14286
    {
14287
        /**
14288
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14289
         */
14290
14291
        /** @noinspection PhpComposerExtensionStubsInspection */
14292
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14293
        return \defined('MB_OVERLOAD_STRING')
14294
               &&
14295
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14296
    }
14297
14298
    /**
14299
     * @param array    $strings
14300
     * @param bool     $remove_empty_values
14301
     * @param int|null $remove_short_values
14302
     *
14303
     * @psalm-pure
14304
     *
14305
     * @return array
14306
     *
14307
     * @noinspection ReturnTypeCanBeDeclaredInspection
14308
     */
14309
    private static function reduce_string_array(
14310
        array $strings,
14311
        bool $remove_empty_values,
14312
        int $remove_short_values = null
14313
    ) {
14314
        // init
14315 2
        $return = [];
14316
14317 2
        foreach ($strings as &$str) {
14318
            if (
14319 2
                $remove_short_values !== null
14320
                &&
14321 2
                \mb_strlen($str) <= $remove_short_values
14322
            ) {
14323 2
                continue;
14324
            }
14325
14326
            if (
14327 2
                $remove_empty_values
14328
                &&
14329 2
                \trim($str) === ''
14330
            ) {
14331 2
                continue;
14332
            }
14333
14334 2
            $return[] = $str;
14335
        }
14336
14337 2
        return $return;
14338
    }
14339
14340
    /**
14341
     * rxClass
14342
     *
14343
     * @param string $s
14344
     * @param string $class
14345
     *
14346
     * @psalm-pure
14347
     *
14348
     * @return string
14349
     *
14350
     * @noinspection ReturnTypeCanBeDeclaredInspection
14351
     */
14352
    private static function rxClass(string $s, string $class = '')
14353
    {
14354
        /**
14355
         * @psalm-suppress ImpureStaticVariable
14356
         *
14357
         * @var array<string,string>
14358
         */
14359 33
        static $RX_CLASS_CACHE = [];
14360
14361 33
        $cache_key = $s . '_' . $class;
14362
14363 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14364 21
            return $RX_CLASS_CACHE[$cache_key];
14365
        }
14366
14367
        /** @var string[] $class_array */
14368 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14369
14370
        /** @noinspection SuspiciousLoopInspection */
14371
        /** @noinspection AlterInForeachInspection */
14372 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14373 15
            if ($s === '-') {
14374
                $class_array[0] = '-' . $class_array[0];
14375 15
            } elseif (!isset($s[2])) {
14376 15
                $class_array[0] .= \preg_quote($s, '/');
14377 1
            } elseif (self::strlen($s) === 1) {
14378 1
                $class_array[0] .= $s;
14379
            } else {
14380 15
                $class_array[] = $s;
14381
            }
14382
        }
14383
14384 16
        if ($class_array[0]) {
14385 16
            $class_array[0] = '[' . $class_array[0] . ']';
14386
        }
14387
14388 16
        if (\count($class_array) === 1) {
14389 16
            $return = $class_array[0];
14390
        } else {
14391
            $return = '(?:' . \implode('|', $class_array) . ')';
14392
        }
14393
14394 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14395
14396 16
        return $return;
14397
    }
14398
14399
    /**
14400
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14401
     *
14402
     * @param string $names
14403
     * @param string $delimiter
14404
     * @param string $encoding
14405
     *
14406
     * @psalm-pure
14407
     *
14408
     * @return string
14409
     *
14410
     * @noinspection ReturnTypeCanBeDeclaredInspection
14411
     */
14412
    private static function str_capitalize_name_helper(
14413
        string $names,
14414
        string $delimiter,
14415
        string $encoding = 'UTF-8'
14416
    ) {
14417
        // init
14418 1
        $name_helper_array = \explode($delimiter, $names);
14419 1
        if ($name_helper_array === false) {
14420
            return '';
14421
        }
14422
14423
        $special_cases = [
14424 1
            'names' => [
14425
                'ab',
14426
                'af',
14427
                'al',
14428
                'and',
14429
                'ap',
14430
                'bint',
14431
                'binte',
14432
                'da',
14433
                'de',
14434
                'del',
14435
                'den',
14436
                'der',
14437
                'di',
14438
                'dit',
14439
                'ibn',
14440
                'la',
14441
                'mac',
14442
                'nic',
14443
                'of',
14444
                'ter',
14445
                'the',
14446
                'und',
14447
                'van',
14448
                'von',
14449
                'y',
14450
                'zu',
14451
            ],
14452
            'prefixes' => [
14453
                'al-',
14454
                "d'",
14455
                'ff',
14456
                "l'",
14457
                'mac',
14458
                'mc',
14459
                'nic',
14460
            ],
14461
        ];
14462
14463 1
        foreach ($name_helper_array as &$name) {
14464 1
            if (\in_array($name, $special_cases['names'], true)) {
14465 1
                continue;
14466
            }
14467
14468 1
            $continue = false;
14469
14470 1
            if ($delimiter === '-') {
14471
                /** @noinspection AlterInForeachInspection */
14472 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14473 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14474 1
                        $continue = true;
14475
14476 1
                        break;
14477
                    }
14478
                }
14479
            }
14480
14481
            /** @noinspection AlterInForeachInspection */
14482 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14483 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14484 1
                    $continue = true;
14485
14486 1
                    break;
14487
                }
14488
            }
14489
14490 1
            if ($continue) {
14491 1
                continue;
14492
            }
14493
14494 1
            $name = self::ucfirst($name);
14495
        }
14496
14497 1
        return \implode($delimiter, $name_helper_array);
14498
    }
14499
14500
    /**
14501
     * Generic case-sensitive transformation for collation matching.
14502
     *
14503
     * @param string $str <p>The input string</p>
14504
     *
14505
     * @psalm-pure
14506
     *
14507
     * @return string|null
14508
     */
14509
    private static function strtonatfold(string $str)
14510
    {
14511
        /** @noinspection PhpUndefinedClassInspection */
14512 6
        return \preg_replace(
14513 6
            '/\p{Mn}+/u',
14514 6
            '',
14515 6
            \Normalizer::normalize($str, \Normalizer::NFD)
14516
        );
14517
    }
14518
14519
    /**
14520
     * @param int|string $input
14521
     *
14522
     * @psalm-pure
14523
     *
14524
     * @return string
14525
     *
14526
     * @noinspection ReturnTypeCanBeDeclaredInspection
14527
     * @noinspection SuspiciousBinaryOperationInspection
14528
     */
14529
    private static function to_utf8_convert_helper($input)
14530
    {
14531
        // init
14532 32
        $buf = '';
14533
14534 32
        if (self::$ORD === null) {
14535 1
            self::$ORD = self::getData('ord');
14536
        }
14537
14538 32
        if (self::$CHR === null) {
14539 1
            self::$CHR = self::getData('chr');
14540
        }
14541
14542 32
        if (self::$WIN1252_TO_UTF8 === null) {
14543 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14544
        }
14545
14546 32
        $ordC1 = self::$ORD[$input];
14547 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14548 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14549
        } else {
14550
            /** @noinspection OffsetOperationsInspection */
14551 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14552 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14553 1
            $buf .= $cc1 . $cc2;
14554
        }
14555
14556 32
        return $buf;
14557
    }
14558
14559
    /**
14560
     * @param string $str
14561
     *
14562
     * @psalm-pure
14563
     *
14564
     * @return string
14565
     *
14566
     * @noinspection ReturnTypeCanBeDeclaredInspection
14567
     */
14568
    private static function urldecode_unicode_helper(string $str)
14569
    {
14570 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14571 10
        if (\preg_match($pattern, $str)) {
14572 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14573
        }
14574
14575 10
        return $str;
14576
    }
14577
}
14578