Passed
Push — master ( b2b049...4644d4 )
by Lars
04:17
created

UTF8   F

Complexity

Total Complexity 1695

Size/Duplication

Total Lines 12550
Duplicated Lines 0 %

Test Coverage

Coverage 79.84%

Importance

Changes 0
Metric Value
wmc 1695
eloc 4358
dl 0
loc 12550
ccs 3042
cts 3810
cp 0.7984
rs 0.8
c 0
b 0
f 0

293 Methods

Rating   Name   Duplication   Size   Complexity  
A chr_to_decimal() 0 30 6
A str_substr_after_first_separator() 0 28 6
A file_has_bom() 0 8 2
A max() 0 14 3
B str_camelize() 0 70 10
A add_bom_to_string() 0 7 2
A parse_str() 0 16 4
A filter_input() 0 13 2
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A array_change_key_case() 0 20 5
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
D strlen() 0 96 19
A str_isubstr_last() 0 20 4
A ctype_loaded() 0 3 1
A str_replace_beginning() 0 21 6
A has_uppercase() 0 8 2
A remove_left() 0 21 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A str_iends_with() 0 7 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
A ltrim() 0 19 4
A emoji_decode() 0 16 2
D is_utf8() 0 144 31
A remove_html() 0 3 1
B str_longest_common_suffix() 0 51 10
A lcword() 0 8 1
A str_pad_both() 0 7 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
D chr() 0 101 18
A html_escape() 0 6 1
A string() 0 10 1
C normalize_encoding() 0 134 14
C get_file_type() 0 89 14
A str_ensure_right() 0 13 4
A chr_to_int() 0 3 1
B str_titleize_for_humans() 0 152 5
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 54 13
A normalize_whitespace() 0 30 6
A str_starts_with() 0 3 1
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 12 2
A decode_mimeheader() 0 15 5
A html_decode() 0 3 1
A strchr() 0 8 1
A strichr() 0 8 1
A isUtf32() 0 3 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 73 16
A regex_replace() 0 20 3
A chunk_split() 0 3 1
A str_iindex_first() 0 11 1
A str_isubstr_before_first_separator() 0 16 5
B strrev() 0 43 10
A replace_all() 0 11 2
A removeBOM() 0 3 1
A strstr_in_byte() 0 12 4
A emoji_encode() 0 16 2
A str_matches_pattern() 0 3 1
A is_alpha() 0 8 2
B str_titleize() 0 55 10
B get_random_string() 0 53 10
A str_replace_first() 0 17 2
A fix_utf8() 0 30 4
A str_pad_right() 0 7 1
A first_char() 0 11 4
C stristr() 0 68 15
A isUtf8() 0 3 1
A strncasecmp() 0 10 1
A css_stripe_media_queries() 0 6 1
A clean() 0 46 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 8 2
A strnatcmp() 0 3 2
D str_pad() 0 146 16
A str_ireplace() 0 18 3
A str_replace_ending() 0 21 6
A string_has_bom() 0 10 3
A str_contains_all() 0 23 6
A is_ascii() 0 7 2
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 23 5
B range() 0 41 10
B strspn() 0 30 10
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
B rawurldecode() 0 44 9
A normalize_msword() 0 43 2
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 7 3
A is_blank() 0 8 2
A str_replace() 0 14 1
D getCharDirection() 0 105 118
A htmlspecialchars() 0 11 3
A replace() 0 11 2
A filter_var_array() 0 9 2
A __construct() 0 2 1
A decimal_to_chr() 0 3 1
A strip_tags() 0 15 4
A pcre_utf8_support() 0 4 1
B between() 0 48 8
A str_isubstr_before_last_separator() 0 21 6
D str_truncate_safe() 0 78 18
A codepoints() 0 29 4
A lowerCaseFirst() 0 8 1
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A chr_map() 0 5 1
A strrpos_in_byte() 0 12 4
A cleanup() 0 25 2
F strrpos() 0 118 25
A remove_right() 0 22 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A char_at() 0 7 2
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 15 4
A chars() 0 3 1
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A finfo_loaded() 0 3 1
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
B str_longest_common_prefix() 0 48 8
A str_pad_left() 0 7 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 23 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
B str_snakeize() 0 55 6
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A str_sort() 0 15 3
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A hasBom() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A str_upper_first() 0 8 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A filter_var() 0 9 2
A is_empty() 0 3 1
B html_encode() 0 42 7
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 139 37
C is_utf32() 0 65 16
C ord() 0 65 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
C strcspn() 0 51 12
A checkForSupport() 0 47 4
B is_json() 0 27 8
A int_to_hex() 0 7 2
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 16 5
A str_last_char() 0 13 4
A str_ireplace_beginning() 0 21 6
A hex_to_int() 0 14 3
A htmlentities() 0 23 3
A hex_to_chr() 0 3 1
A str_substr_before_first_separator() 0 32 6
A isJson() 0 3 1
A str_surround() 0 3 1
A strncmp() 0 19 4
A filter_input_array() 0 9 2
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 35 5
A chr_to_hex() 0 11 3
B str_delimit() 0 33 8
A min() 0 14 3
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 86 17
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
A str_contains_any() 0 23 6
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
A access() 0 11 4
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 5 2
B file_get_contents() 0 58 11
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 5 1
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
B str_to_words() 0 36 8
B urldecode_fix_win1252_chars() 0 227 1
A substr_in_byte() 0 18 6
A substr_left() 0 15 4
A to_filename() 0 24 2
C utf8_decode() 0 60 13
C wordwrap() 0 52 12
B ucfirst() 0 57 7
A toUTF8() 0 3 1
C substr_count_in_byte() 0 54 15
A titlecase() 0 24 5
A getData() 0 6 1
B strtolower() 0 54 10
B urldecode() 0 44 9
D substr_replace() 0 124 27
A ws() 0 3 1
A toLatin1() 0 3 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
B strwidth() 0 40 8
A trim() 0 19 4
A substr_compare() 0 33 6
C substr_count() 0 62 16
A to_latin1() 0 3 1
B strtr() 0 33 8
B str_capitalize_name_helper() 0 80 10
A utf8_encode() 0 16 3
A substr_iright() 0 15 4
A to_iso8859() 0 16 4
A words_limit() 0 17 5
A substr_right() 0 31 6
A tabs_to_spaces() 0 11 3
F to_ascii() 0 149 27
A reduce_string_array() 0 26 6
A mbstring_overloaded() 0 11 2
A whitespace_table() 0 3 1
A substr_count_simple() 0 32 6
D to_utf8() 0 117 35
A ucword() 0 3 1
A getDataIfExists() 0 10 2
A toAscii() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A toIso8859() 0 3 1
A strtonatfold() 0 6 1
A fixStrCaseHelper() 0 33 5
F substr() 0 143 32
A wordwrap_per_line() 0 15 3
A utf8_fix_win1252_chars() 0 3 1
A to_utf8_convert_helper() 0 27 5
B strtoupper() 0 54 10
A initEmojiData() 0 26 4
A symfony_polyfill_used() 0 16 5
B rxClass() 0 39 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @see   UTF8::chr_map()
420
     *
421
     * @param array|string $callback
422
     * @param string       $str
423
     *
424
     * @return string[]
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
465
     *
466
     * @return true|null
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 24
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 24
        static $CHAR_CACHE = [];
531
532 24
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 24
            $encoding !== 'UTF-8'
538
            &&
539 24
            $encoding !== 'ISO-8859-1'
540
            &&
541 24
            $encoding !== 'WINDOWS-1252'
542
            &&
543 24
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 24
        $cacheKey = $code_point . $encoding;
549 24
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 22
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 12
            if (self::$CHR === null) {
556
                self::$CHR = (array) self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 12
            $chr = self::$CHR[$code_point];
563
564 12
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 12
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = (array) self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @see UTF8::chr_to_decimal()
741
     *
742
     * @param string $chr
743
     *
744
     * @return int
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regx = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808 114
        $str = (string) \preg_replace($regx, '$1', $str);
809
810 114
        if ($replace_diamond_question_mark === true) {
811 60
            $str = self::replace_diamond_question_mark($str, '');
812
        }
813
814 114
        if ($remove_invisible_characters === true) {
815 114
            $str = self::remove_invisible_characters($str);
816
        }
817
818 114
        if ($normalize_whitespace === true) {
819 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
820
        }
821
822 114
        if ($normalize_msword === true) {
823 32
            $str = self::normalize_msword($str);
824
        }
825
826 114
        if ($remove_bom === true) {
827 64
            $str = self::remove_bom($str);
828
        }
829
830 114
        return $str;
831
    }
832
833
    /**
834
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
835
     *
836
     * @param string $str <p>The input string.</p>
837
     *
838
     * @return string
839
     */
840 33
    public static function cleanup($str): string
841
    {
842
        // init
843 33
        $str = (string) $str;
844
845 33
        if ($str === '') {
846 5
            return '';
847
        }
848
849
        // fixed ISO <-> UTF-8 Errors
850 33
        $str = self::fix_simple_utf8($str);
851
852
        // remove all none UTF-8 symbols
853
        // && remove diamond question mark (�)
854
        // && remove remove invisible characters (e.g. "\0")
855
        // && remove BOM
856
        // && normalize whitespace chars (but keep non-breaking-spaces)
857 33
        return self::clean(
858 33
            $str,
859 33
            true,
860 33
            true,
861 33
            false,
862 33
            true,
863 33
            true,
864 33
            true
865
        );
866
    }
867
868
    /**
869
     * Accepts a string or a array of strings and returns an array of Unicode code points.
870
     *
871
     * INFO: opposite to UTF8::string()
872
     *
873
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
874
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
875
     *                                 default, code points will be returned as integers.</p>
876
     *
877
     * @return array<int|string>
878
     *                           The array of code points:<br>
879
     *                           array<int> for $u_style === false<br>
880
     *                           array<string> for $u_style === true<br>
881
     */
882 12
    public static function codepoints($arg, bool $u_style = false): array
883
    {
884 12
        if (\is_string($arg) === true) {
885 12
            $arg = self::str_split($arg);
886
        }
887
888 12
        $arg = \array_map(
889
            [
890 12
                self::class,
891
                'ord',
892
            ],
893 12
            $arg
894
        );
895
896 12
        if (\count($arg) === 0) {
897 7
            return [];
898
        }
899
900 11
        if ($u_style === true) {
901 2
            $arg = \array_map(
902
                [
903 2
                    self::class,
904
                    'int_to_hex',
905
                ],
906 2
                $arg
907
            );
908
        }
909
910 11
        return $arg;
911
    }
912
913
    /**
914
     * Trims the string and replaces consecutive whitespace characters with a
915
     * single space. This includes tabs and newline characters, as well as
916
     * multibyte whitespace such as the thin space and ideographic space.
917
     *
918
     * @param string $str <p>The input string.</p>
919
     *
920
     * @return string string with a trimmed $str and condensed whitespace
921
     */
922 13
    public static function collapse_whitespace(string $str): string
923
    {
924 13
        if (self::$SUPPORT['mbstring'] === true) {
925
            /** @noinspection PhpComposerExtensionStubsInspection */
926 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
927
        }
928
929
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
930
    }
931
932
    /**
933
     * Returns count of characters used in a string.
934
     *
935
     * @param string $str                <p>The input string.</p>
936
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
937
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
938
     *
939
     * @return int[] an associative array of Character as keys and
940
     *               their count as values
941
     */
942 19
    public static function count_chars(
943
        string $str,
944
        bool $cleanUtf8 = false,
945
        bool $tryToUseMbFunction = true
946
    ): array {
947 19
        return \array_count_values(
948 19
            self::str_split(
949 19
                $str,
950 19
                1,
951 19
                $cleanUtf8,
952 19
                $tryToUseMbFunction
953
            )
954
        );
955
    }
956
957
    /**
958
     * Remove css media-queries.
959
     *
960
     * @param string $str
961
     *
962
     * @return string
963
     */
964 1
    public static function css_stripe_media_queries(string $str): string
965
    {
966 1
        return (string) \preg_replace(
967 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
968 1
            '',
969 1
            $str
970
        );
971
    }
972
973
    /**
974
     * Checks whether ctype is available on the server.
975
     *
976
     * @return bool
977
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
978
     */
979
    public static function ctype_loaded(): bool
980
    {
981
        return \extension_loaded('ctype');
982
    }
983
984
    /**
985
     * Converts a int-value into an UTF-8 character.
986
     *
987
     * @param mixed $int
988
     *
989
     * @return string
990
     */
991 16
    public static function decimal_to_chr($int): string
992
    {
993 16
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
994
    }
995
996
    /**
997
     * Decodes a MIME header field
998
     *
999
     * @param string $str
1000
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1001
     *
1002
     * @return false|string
1003
     *                      A decoded MIME field on success,
1004
     *                      or false if an error occurs during the decoding
1005
     */
1006
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1007
    {
1008
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1009
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1010
        }
1011
1012
        if (self::$SUPPORT['iconv'] === true) {
1013
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1014
        }
1015
1016
        if ($encoding !== 'UTF-8') {
1017
            $str = self::encode($encoding, $str);
1018
        }
1019
1020
        return \mb_decode_mimeheader($str);
1021
    }
1022
1023
    /**
1024
     * Encode a string with a new charset-encoding.
1025
     *
1026
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1027
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1028
     *
1029
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1030
     * @param string $str                    <p>The input string</p>
1031
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1032
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1033
     *                                       string-encoding</p>
1034
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1035
     *                                       A empty string will trigger the autodetect anyway.</p>
1036
     *
1037
     * @return string
1038
     *
1039
     * @psalm-suppress InvalidReturnStatement
1040
     */
1041 28
    public static function encode(
1042
        string $toEncoding,
1043
        string $str,
1044
        bool $autodetectFromEncoding = true,
1045
        string $fromEncoding = ''
1046
    ): string {
1047 28
        if ($str === '' || $toEncoding === '') {
1048 13
            return $str;
1049
        }
1050
1051 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1052 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1053
        }
1054
1055 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1056 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1057
        }
1058
1059
        if (
1060 28
            $toEncoding
1061
            &&
1062 28
            $fromEncoding
1063
            &&
1064 28
            $fromEncoding === $toEncoding
1065
        ) {
1066
            return $str;
1067
        }
1068
1069 28
        if ($toEncoding === 'JSON') {
1070 1
            $return = self::json_encode($str);
1071 1
            if ($return === false) {
1072
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1073
            }
1074
1075 1
            return $return;
1076
        }
1077 28
        if ($fromEncoding === 'JSON') {
1078 1
            $str = self::json_decode($str);
1079 1
            $fromEncoding = '';
1080
        }
1081
1082 28
        if ($toEncoding === 'BASE64') {
1083 2
            return \base64_encode($str);
1084
        }
1085 28
        if ($fromEncoding === 'BASE64') {
1086 2
            $str = \base64_decode($str, true);
1087 2
            $fromEncoding = '';
1088
        }
1089
1090 28
        if ($toEncoding === 'HTML-ENTITIES') {
1091 2
            return self::html_encode($str, true, 'UTF-8');
1092
        }
1093 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1094 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1095 2
            $fromEncoding = '';
1096
        }
1097
1098 28
        $fromEncodingDetected = false;
1099
        if (
1100 28
            $autodetectFromEncoding === true
1101
            ||
1102 28
            !$fromEncoding
1103
        ) {
1104 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1105
        }
1106
1107
        // DEBUG
1108
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1109
1110 28
        if ($fromEncodingDetected !== false) {
1111 24
            $fromEncoding = $fromEncodingDetected;
1112 7
        } elseif ($autodetectFromEncoding === true) {
1113
            // fallback for the "autodetect"-mode
1114 7
            return self::to_utf8($str);
1115
        }
1116
1117
        if (
1118 24
            !$fromEncoding
1119
            ||
1120 24
            $fromEncoding === $toEncoding
1121
        ) {
1122 15
            return $str;
1123
        }
1124
1125
        if (
1126 19
            $toEncoding === 'UTF-8'
1127
            &&
1128
            (
1129 17
                $fromEncoding === 'WINDOWS-1252'
1130
                ||
1131 19
                $fromEncoding === 'ISO-8859-1'
1132
            )
1133
        ) {
1134 13
            return self::to_utf8($str);
1135
        }
1136
1137
        if (
1138 12
            $toEncoding === 'ISO-8859-1'
1139
            &&
1140
            (
1141 6
                $fromEncoding === 'WINDOWS-1252'
1142
                ||
1143 12
                $fromEncoding === 'UTF-8'
1144
            )
1145
        ) {
1146 6
            return self::to_iso8859($str);
1147
        }
1148
1149
        if (
1150 10
            $toEncoding !== 'UTF-8'
1151
            &&
1152 10
            $toEncoding !== 'ISO-8859-1'
1153
            &&
1154 10
            $toEncoding !== 'WINDOWS-1252'
1155
            &&
1156 10
            self::$SUPPORT['mbstring'] === false
1157
        ) {
1158
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1159
        }
1160
1161 10
        if (self::$SUPPORT['mbstring'] === true) {
1162
            // warning: do not use the symfony polyfill here
1163 10
            $strEncoded = \mb_convert_encoding(
1164 10
                $str,
1165 10
                $toEncoding,
1166 10
                $fromEncoding
1167
            );
1168
1169 10
            if ($strEncoded) {
1170 10
                return $strEncoded;
1171
            }
1172
        }
1173
1174
        $return = \iconv($fromEncoding, $toEncoding, $str);
1175
        if ($return !== false) {
1176
            return $return;
1177
        }
1178
1179
        return $str;
1180
    }
1181
1182
    /**
1183
     * @param string $str
1184
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1185
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1186
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1187
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1188
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1189
     *
1190
     * @return false|string
1191
     *                      An encoded MIME field on success,
1192
     *                      or false if an error occurs during the encoding
1193
     */
1194
    public static function encode_mimeheader(
1195
        $str,
1196
        $fromCharset = 'UTF-8',
1197
        $toCharset = 'UTF-8',
1198
        $transferEncoding = 'Q',
1199
        $linefeed = "\r\n",
1200
        $indent = 76
1201
    ) {
1202
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1203
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1204
        }
1205
1206
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1207
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1208
        }
1209
1210
        return \iconv_mime_encode(
1211
            '',
1212
            $str,
1213
            [
1214
                'scheme'           => $transferEncoding,
1215
                'line-length'      => $indent,
1216
                'input-charset'    => $fromCharset,
1217
                'output-charset'   => $toCharset,
1218
                'line-break-chars' => $linefeed,
1219
            ]
1220
        );
1221
    }
1222
1223
    /**
1224
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1225
     *
1226
     * @param string   $str                    <p>The input string.</p>
1227
     * @param string   $search                 <p>The searched string.</p>
1228
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1229
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1230
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1231
     *
1232
     * @return string
1233
     */
1234 1
    public static function extract_text(
1235
        string $str,
1236
        string $search = '',
1237
        int $length = null,
1238
        string $replacerForSkippedText = '…',
1239
        string $encoding = 'UTF-8'
1240
    ): string {
1241 1
        if ($str === '') {
1242 1
            return '';
1243
        }
1244
1245 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1246
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1247
        }
1248
1249 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1250
1251 1
        if ($length === null) {
1252 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1253
        }
1254
1255 1
        if ($search === '') {
1256 1
            if ($encoding === 'UTF-8') {
1257 1
                if ($length > 0) {
1258 1
                    $stringLength = (int) \mb_strlen($str);
1259 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1260
                } else {
1261 1
                    $end = 0;
1262
                }
1263
1264 1
                $pos = (int) \min(
1265 1
                    \mb_strpos($str, ' ', $end),
1266 1
                    \mb_strpos($str, '.', $end)
1267
                );
1268
            } else {
1269
                if ($length > 0) {
1270
                    $stringLength = (int) self::strlen($str, $encoding);
1271
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1272
                } else {
1273
                    $end = 0;
1274
                }
1275
1276
                $pos = (int) \min(
1277
                    self::strpos($str, ' ', $end, $encoding),
1278
                    self::strpos($str, '.', $end, $encoding)
1279
                );
1280
            }
1281
1282 1
            if ($pos) {
1283 1
                if ($encoding === 'UTF-8') {
1284 1
                    $strSub = \mb_substr($str, 0, $pos);
1285
                } else {
1286
                    $strSub = self::substr($str, 0, $pos, $encoding);
1287
                }
1288
1289 1
                if ($strSub === false) {
1290
                    return '';
1291
                }
1292
1293 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1294
            }
1295
1296
            return $str;
1297
        }
1298
1299 1
        if ($encoding === 'UTF-8') {
1300 1
            $wordPos = (int) \mb_stripos($str, $search);
1301 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1302
        } else {
1303
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1304
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1305
        }
1306
1307 1
        $pos_start = 0;
1308 1
        if ($halfSide > 0) {
1309 1
            if ($encoding === 'UTF-8') {
1310 1
                $halfText = \mb_substr($str, 0, $halfSide);
1311
            } else {
1312
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1313
            }
1314 1
            if ($halfText !== false) {
1315 1
                if ($encoding === 'UTF-8') {
1316 1
                    $pos_start = (int) \max(
1317 1
                        \mb_strrpos($halfText, ' '),
1318 1
                        \mb_strrpos($halfText, '.')
1319
                    );
1320
                } else {
1321
                    $pos_start = (int) \max(
1322
                        self::strrpos($halfText, ' ', 0, $encoding),
1323
                        self::strrpos($halfText, '.', 0, $encoding)
1324
                    );
1325
                }
1326
            }
1327
        }
1328
1329 1
        if ($wordPos && $halfSide > 0) {
1330 1
            $offset = $pos_start + $length - 1;
1331 1
            $realLength = (int) self::strlen($str, $encoding);
1332
1333 1
            if ($offset > $realLength) {
1334
                $offset = $realLength;
1335
            }
1336
1337 1
            if ($encoding === 'UTF-8') {
1338 1
                $pos_end = (int) \min(
1339 1
                    \mb_strpos($str, ' ', $offset),
1340 1
                    \mb_strpos($str, '.', $offset)
1341 1
                ) - $pos_start;
1342
            } else {
1343
                $pos_end = (int) \min(
1344
                    self::strpos($str, ' ', $offset, $encoding),
1345
                    self::strpos($str, '.', $offset, $encoding)
1346
                ) - $pos_start;
1347
            }
1348
1349 1
            if (!$pos_end || $pos_end <= 0) {
1350 1
                if ($encoding === 'UTF-8') {
1351 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1352
                } else {
1353
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1354
                }
1355 1
                if ($strSub !== false) {
1356 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1357
                } else {
1358 1
                    $extract = '';
1359
                }
1360
            } else {
1361 1
                if ($encoding === 'UTF-8') {
1362 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1363
                } else {
1364
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1365
                }
1366 1
                if ($strSub !== false) {
1367 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1368
                } else {
1369 1
                    $extract = '';
1370
                }
1371
            }
1372
        } else {
1373 1
            $offset = $length - 1;
1374 1
            $trueLength = (int) self::strlen($str, $encoding);
1375
1376 1
            if ($offset > $trueLength) {
1377
                $offset = $trueLength;
1378
            }
1379
1380 1
            if ($encoding === 'UTF-8') {
1381 1
                $pos_end = (int) \min(
1382 1
                    \mb_strpos($str, ' ', $offset),
1383 1
                    \mb_strpos($str, '.', $offset)
1384
                );
1385
            } else {
1386
                $pos_end = (int) \min(
1387
                    self::strpos($str, ' ', $offset, $encoding),
1388
                    self::strpos($str, '.', $offset, $encoding)
1389
                );
1390
            }
1391
1392 1
            if ($pos_end) {
1393 1
                if ($encoding === 'UTF-8') {
1394 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1395
                } else {
1396
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1397
                }
1398 1
                if ($strSub !== false) {
1399 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1400
                } else {
1401 1
                    $extract = '';
1402
                }
1403
            } else {
1404 1
                $extract = $str;
1405
            }
1406
        }
1407
1408 1
        return $extract;
1409
    }
1410
1411
    /**
1412
     * Reads entire file into a string.
1413
     *
1414
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1415
     *
1416
     * @see http://php.net/manual/en/function.file-get-contents.php
1417
     *
1418
     * @param string        $filename         <p>
1419
     *                                        Name of the file to read.
1420
     *                                        </p>
1421
     * @param bool          $use_include_path [optional] <p>
1422
     *                                        Prior to PHP 5, this parameter is called
1423
     *                                        use_include_path and is a bool.
1424
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1425
     *                                        to trigger include path
1426
     *                                        search.
1427
     *                                        </p>
1428
     * @param resource|null $context          [optional] <p>
1429
     *                                        A valid context resource created with
1430
     *                                        stream_context_create. If you don't need to use a
1431
     *                                        custom context, you can skip this parameter by &null;.
1432
     *                                        </p>
1433
     * @param int|null      $offset           [optional] <p>
1434
     *                                        The offset where the reading starts.
1435
     *                                        </p>
1436
     * @param int|null      $maxLength        [optional] <p>
1437
     *                                        Maximum length of data read. The default is to read until end
1438
     *                                        of file is reached.
1439
     *                                        </p>
1440
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1441
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1442
     *                                        some files, because they used non default utf-8 chars. Binary files
1443
     *                                        like images or pdf will not be converted.</p>
1444
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1445
     *                                        A empty string will trigger the autodetect anyway.</p>
1446
     *
1447
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1448
     */
1449 12
    public static function file_get_contents(
1450
        string $filename,
1451
        bool $use_include_path = false,
1452
        $context = null,
1453
        int $offset = null,
1454
        int $maxLength = null,
1455
        int $timeout = 10,
1456
        bool $convertToUtf8 = true,
1457
        string $fromEncoding = ''
1458
    ) {
1459
        // init
1460 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1461
1462 12
        if ($filename === false) {
1463
            return false;
1464
        }
1465
1466 12
        if ($timeout && $context === null) {
1467 9
            $context = \stream_context_create(
1468
                [
1469
                    'http' => [
1470 9
                        'timeout' => $timeout,
1471
                    ],
1472
                ]
1473
            );
1474
        }
1475
1476 12
        if ($offset === null) {
1477 12
            $offset = 0;
1478
        }
1479
1480 12
        if (\is_int($maxLength) === true) {
1481 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1482
        } else {
1483 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1484
        }
1485
1486
        // return false on error
1487 12
        if ($data === false) {
1488
            return false;
1489
        }
1490
1491 12
        if ($convertToUtf8 === true) {
1492
            if (
1493 12
                self::is_binary($data, true) === true
1494
                &&
1495 12
                self::is_utf16($data, false) === false
1496
                &&
1497 12
                self::is_utf32($data, false) === false
1498 7
            ) {
1499
                // do nothing, it's binary and not UTF16 or UTF32
1500
            } else {
1501 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1502 9
                $data = self::cleanup($data);
1503
            }
1504
        }
1505
1506 12
        return $data;
1507
    }
1508
1509
    /**
1510
     * Checks if a file starts with BOM (Byte Order Mark) character.
1511
     *
1512
     * @param string $file_path <p>Path to a valid file.</p>
1513
     *
1514
     * @throws \RuntimeException if file_get_contents() returned false
1515
     *
1516
     * @return bool
1517
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1518
     */
1519 2
    public static function file_has_bom(string $file_path): bool
1520
    {
1521 2
        $file_content = \file_get_contents($file_path);
1522 2
        if ($file_content === false) {
1523
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1524
        }
1525
1526 2
        return self::string_has_bom($file_content);
1527
    }
1528
1529
    /**
1530
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1531
     *
1532
     * @param mixed  $var
1533
     * @param int    $normalization_form
1534
     * @param string $leading_combining
1535
     *
1536
     * @return mixed
1537
     */
1538 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1539
    {
1540 62
        switch (\gettype($var)) {
1541 62
            case 'array':
1542 6
                foreach ($var as $k => &$v) {
1543 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1544
                }
1545 6
                unset($v);
1546
1547 6
                break;
1548 62
            case 'object':
1549 4
                foreach ($var as $k => &$v) {
1550 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1551
                }
1552 4
                unset($v);
1553
1554 4
                break;
1555 62
            case 'string':
1556
1557 62
                if (\strpos($var, "\r") !== false) {
1558
                    // Workaround https://bugs.php.net/65732
1559 3
                    $var = self::normalize_line_ending($var);
1560
                }
1561
1562 62
                if (self::is_ascii($var) === false) {
1563 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1564 27
                        $n = '-';
1565
                    } else {
1566 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1567
1568 12
                        if (isset($n[0])) {
1569 7
                            $var = $n;
1570
                        } else {
1571 8
                            $var = self::encode('UTF-8', $var, true);
1572
                        }
1573
                    }
1574
1575
                    if (
1576 32
                        $var[0] >= "\x80"
1577
                        &&
1578 32
                        isset($n[0], $leading_combining[0])
1579
                        &&
1580 32
                        \preg_match('/^\p{Mn}/u', $var)
1581
                    ) {
1582
                        // Prevent leading combining chars
1583
                        // for NFC-safe concatenations.
1584 3
                        $var = $leading_combining . $var;
1585
                    }
1586
                }
1587
1588 62
                break;
1589
        }
1590
1591 62
        return $var;
1592
    }
1593
1594
    /**
1595
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1596
     *
1597
     * Gets a specific external variable by name and optionally filters it
1598
     *
1599
     * @see  http://php.net/manual/en/function.filter-input.php
1600
     *
1601
     * @param int    $type          <p>
1602
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1603
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1604
     *                              <b>INPUT_ENV</b>.
1605
     *                              </p>
1606
     * @param string $variable_name <p>
1607
     *                              Name of a variable to get.
1608
     *                              </p>
1609
     * @param int    $filter        [optional] <p>
1610
     *                              The ID of the filter to apply. The
1611
     *                              manual page lists the available filters.
1612
     *                              </p>
1613
     * @param mixed  $options       [optional] <p>
1614
     *                              Associative array of options or bitwise disjunction of flags. If filter
1615
     *                              accepts options, flags can be provided in "flags" field of array.
1616
     *                              </p>
1617
     *
1618
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1619
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1620
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1621
     */
1622
    public static function filter_input(
1623
        int $type,
1624
        string $variable_name,
1625
        int $filter = \FILTER_DEFAULT,
1626
        $options = null
1627
    ) {
1628
        if (\func_num_args() < 4) {
1629
            $var = \filter_input($type, $variable_name, $filter);
1630
        } else {
1631
            $var = \filter_input($type, $variable_name, $filter, $options);
1632
        }
1633
1634
        return self::filter($var);
1635
    }
1636
1637
    /**
1638
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1639
     *
1640
     * Gets external variables and optionally filters them
1641
     *
1642
     * @see  http://php.net/manual/en/function.filter-input-array.php
1643
     *
1644
     * @param int   $type       <p>
1645
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1646
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1647
     *                          <b>INPUT_ENV</b>.
1648
     *                          </p>
1649
     * @param mixed $definition [optional] <p>
1650
     *                          An array defining the arguments. A valid key is a string
1651
     *                          containing a variable name and a valid value is either a filter type, or an array
1652
     *                          optionally specifying the filter, flags and options. If the value is an
1653
     *                          array, valid keys are filter which specifies the
1654
     *                          filter type,
1655
     *                          flags which specifies any flags that apply to the
1656
     *                          filter, and options which specifies any options that
1657
     *                          apply to the filter. See the example below for a better understanding.
1658
     *                          </p>
1659
     *                          <p>
1660
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1661
     *                          input array are filtered by this filter.
1662
     *                          </p>
1663
     * @param bool  $add_empty  [optional] <p>
1664
     *                          Add missing keys as <b>NULL</b> to the return value.
1665
     *                          </p>
1666
     *
1667
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1668
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1669
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1670
     *               is not set and <b>NULL</b> if the filter fails.
1671
     */
1672
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1673
    {
1674
        if (\func_num_args() < 2) {
1675
            $a = \filter_input_array($type);
1676
        } else {
1677
            $a = \filter_input_array($type, $definition, $add_empty);
1678
        }
1679
1680
        return self::filter($a);
1681
    }
1682
1683
    /**
1684
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1685
     *
1686
     * Filters a variable with a specified filter
1687
     *
1688
     * @see  http://php.net/manual/en/function.filter-var.php
1689
     *
1690
     * @param mixed $variable <p>
1691
     *                        Value to filter.
1692
     *                        </p>
1693
     * @param int   $filter   [optional] <p>
1694
     *                        The ID of the filter to apply. The
1695
     *                        manual page lists the available filters.
1696
     *                        </p>
1697
     * @param mixed $options  [optional] <p>
1698
     *                        Associative array of options or bitwise disjunction of flags. If filter
1699
     *                        accepts options, flags can be provided in "flags" field of array. For
1700
     *                        the "callback" filter, callable type should be passed. The
1701
     *                        callback must accept one argument, the value to be filtered, and return
1702
     *                        the value after filtering/sanitizing it.
1703
     *                        </p>
1704
     *                        <p>
1705
     *                        <code>
1706
     *                        // for filters that accept options, use this format
1707
     *                        $options = array(
1708
     *                        'options' => array(
1709
     *                        'default' => 3, // value to return if the filter fails
1710
     *                        // other options here
1711
     *                        'min_range' => 0
1712
     *                        ),
1713
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1714
     *                        );
1715
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1716
     *                        // for filter that only accept flags, you can pass them directly
1717
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1718
     *                        // for filter that only accept flags, you can also pass as an array
1719
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1720
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1721
     *                        // callback validate filter
1722
     *                        function foo($value)
1723
     *                        {
1724
     *                        // Expected format: Surname, GivenNames
1725
     *                        if (strpos($value, ", ") === false) return false;
1726
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1727
     *                        $empty = (empty($surname) || empty($givennames));
1728
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1729
     *                        if ($empty || $notstrings) {
1730
     *                        return false;
1731
     *                        } else {
1732
     *                        return $value;
1733
     *                        }
1734
     *                        }
1735
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1736
     *                        </code>
1737
     *                        </p>
1738
     *
1739
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1740
     */
1741 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1742
    {
1743 2
        if (\func_num_args() < 3) {
1744 2
            $variable = \filter_var($variable, $filter);
1745
        } else {
1746 2
            $variable = \filter_var($variable, $filter, $options);
1747
        }
1748
1749 2
        return self::filter($variable);
1750
    }
1751
1752
    /**
1753
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1754
     *
1755
     * Gets multiple variables and optionally filters them
1756
     *
1757
     * @see  http://php.net/manual/en/function.filter-var-array.php
1758
     *
1759
     * @param array $data       <p>
1760
     *                          An array with string keys containing the data to filter.
1761
     *                          </p>
1762
     * @param mixed $definition [optional] <p>
1763
     *                          An array defining the arguments. A valid key is a string
1764
     *                          containing a variable name and a valid value is either a
1765
     *                          filter type, or an
1766
     *                          array optionally specifying the filter, flags and options.
1767
     *                          If the value is an array, valid keys are filter
1768
     *                          which specifies the filter type,
1769
     *                          flags which specifies any flags that apply to the
1770
     *                          filter, and options which specifies any options that
1771
     *                          apply to the filter. See the example below for a better understanding.
1772
     *                          </p>
1773
     *                          <p>
1774
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1775
     *                          input array are filtered by this filter.
1776
     *                          </p>
1777
     * @param bool  $add_empty  [optional] <p>
1778
     *                          Add missing keys as <b>NULL</b> to the return value.
1779
     *                          </p>
1780
     *
1781
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1782
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1783
     *               set
1784
     */
1785 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1786
    {
1787 2
        if (\func_num_args() < 2) {
1788 2
            $a = \filter_var_array($data);
1789
        } else {
1790 2
            $a = \filter_var_array($data, $definition, $add_empty);
1791
        }
1792
1793 2
        return self::filter($a);
1794
    }
1795
1796
    /**
1797
     * Checks whether finfo is available on the server.
1798
     *
1799
     * @return bool
1800
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1801
     */
1802
    public static function finfo_loaded(): bool
1803
    {
1804
        return \class_exists('finfo');
1805
    }
1806
1807
    /**
1808
     * Returns the first $n characters of the string.
1809
     *
1810
     * @param string $str      <p>The input string.</p>
1811
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1812
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1813
     *
1814
     * @return string
1815
     */
1816 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1817
    {
1818 13
        if ($str === '' || $n <= 0) {
1819 5
            return '';
1820
        }
1821
1822 8
        if ($encoding === 'UTF-8') {
1823 4
            return (string) \mb_substr($str, 0, $n);
1824
        }
1825
1826 4
        return (string) self::substr($str, 0, $n, $encoding);
1827
    }
1828
1829
    /**
1830
     * Check if the number of unicode characters are not more than the specified integer.
1831
     *
1832
     * @param string $str      the original string to be checked
1833
     * @param int    $box_size the size in number of chars to be checked against string
1834
     *
1835
     * @return bool true if string is less than or equal to $box_size, false otherwise
1836
     */
1837 2
    public static function fits_inside(string $str, int $box_size): bool
1838
    {
1839 2
        return self::strlen($str) <= $box_size;
1840
    }
1841
1842
    /**
1843
     * Try to fix simple broken UTF-8 strings.
1844
     *
1845
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1846
     *
1847
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1848
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1849
     * See: http://en.wikipedia.org/wiki/Windows-1252
1850
     *
1851
     * @param string $str <p>The input string</p>
1852
     *
1853
     * @return string
1854
     */
1855 43
    public static function fix_simple_utf8(string $str): string
1856
    {
1857 43
        if ($str === '') {
1858 4
            return '';
1859
        }
1860
1861 43
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1862 43
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1863
1864 43
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1865 1
            if (self::$BROKEN_UTF8_FIX === null) {
1866 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1867
            }
1868
1869 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1870 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1871
        }
1872
1873 43
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1874
    }
1875
1876
    /**
1877
     * Fix a double (or multiple) encoded UTF8 string.
1878
     *
1879
     * @param string|string[] $str you can use a string or an array of strings
1880
     *
1881
     * @return string|string[]
1882
     *                         Will return the fixed input-"array" or
1883
     *                         the fixed input-"string"
1884
     *
1885
     * @psalm-suppress InvalidReturnType
1886
     */
1887 2
    public static function fix_utf8($str)
1888
    {
1889 2
        if (\is_array($str) === true) {
1890 2
            foreach ($str as $k => &$v) {
1891 2
                $v = self::fix_utf8($v);
1892
            }
1893 2
            unset($v);
1894
1895
            /**
1896
             * @psalm-suppress InvalidReturnStatement
1897
             */
1898 2
            return $str;
1899
        }
1900
1901 2
        $str = (string) $str;
1902 2
        $last = '';
1903 2
        while ($last !== $str) {
1904 2
            $last = $str;
1905
            /**
1906
             * @psalm-suppress PossiblyInvalidArgument
1907
             */
1908 2
            $str = self::to_utf8(
1909 2
                self::utf8_decode($str, true)
1910
            );
1911
        }
1912
1913
        /**
1914
         * @psalm-suppress InvalidReturnStatement
1915
         */
1916 2
        return $str;
1917
    }
1918
1919
    /**
1920
     * Get character of a specific character.
1921
     *
1922
     * @param string $char
1923
     *
1924
     * @return string 'RTL' or 'LTR'
1925
     */
1926 2
    public static function getCharDirection(string $char): string
1927
    {
1928 2
        if (self::$SUPPORT['intlChar'] === true) {
1929
            /** @noinspection PhpComposerExtensionStubsInspection */
1930 2
            $tmpReturn = \IntlChar::charDirection($char);
1931
1932
            // from "IntlChar"-Class
1933
            $charDirection = [
1934 2
                'RTL' => [1, 13, 14, 15, 21],
1935
                'LTR' => [0, 11, 12, 20],
1936
            ];
1937
1938 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1939
                return 'LTR';
1940
            }
1941
1942 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1943 2
                return 'RTL';
1944
            }
1945
        }
1946
1947 2
        $c = static::chr_to_decimal($char);
1948
1949 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1950 2
            return 'LTR';
1951
        }
1952
1953 2
        if ($c <= 0x85e) {
1954 2
            if ($c === 0x5be ||
1955 2
                $c === 0x5c0 ||
1956 2
                $c === 0x5c3 ||
1957 2
                $c === 0x5c6 ||
1958 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1959 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1960 2
                $c === 0x608 ||
1961 2
                $c === 0x60b ||
1962 2
                $c === 0x60d ||
1963 2
                $c === 0x61b ||
1964 2
                ($c >= 0x61e && $c <= 0x64a) ||
1965
                ($c >= 0x66d && $c <= 0x66f) ||
1966
                ($c >= 0x671 && $c <= 0x6d5) ||
1967
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1968
                ($c >= 0x6ee && $c <= 0x6ef) ||
1969
                ($c >= 0x6fa && $c <= 0x70d) ||
1970
                $c === 0x710 ||
1971
                ($c >= 0x712 && $c <= 0x72f) ||
1972
                ($c >= 0x74d && $c <= 0x7a5) ||
1973
                $c === 0x7b1 ||
1974
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1975
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1976
                $c === 0x7fa ||
1977
                ($c >= 0x800 && $c <= 0x815) ||
1978
                $c === 0x81a ||
1979
                $c === 0x824 ||
1980
                $c === 0x828 ||
1981
                ($c >= 0x830 && $c <= 0x83e) ||
1982
                ($c >= 0x840 && $c <= 0x858) ||
1983 2
                $c === 0x85e
1984
            ) {
1985 2
                return 'RTL';
1986
            }
1987 2
        } elseif ($c === 0x200f) {
1988
            return 'RTL';
1989 2
        } elseif ($c >= 0xfb1d) {
1990 2
            if ($c === 0xfb1d ||
1991 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1992 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1993 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1994 2
                $c === 0xfb3e ||
1995 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1996 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1997 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1998 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1999 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2000 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2001 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2002 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2003 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2004 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2005 2
                $c === 0x10808 ||
2006 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2007 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2008 2
                $c === 0x1083c ||
2009 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2010 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2011 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2012 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2013 2
                $c === 0x1093f ||
2014 2
                $c === 0x10a00 ||
2015 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2016 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2017 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2018 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2019 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2020 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2021 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2022 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2023 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2024 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2025
            ) {
2026 2
                return 'RTL';
2027
            }
2028
        }
2029
2030 2
        return 'LTR';
2031
    }
2032
2033
    /**
2034
     * Check for php-support.
2035
     *
2036
     * @param string|null $key
2037
     *
2038
     * @return mixed
2039
     *               Return the full support-"array", if $key === null<br>
2040
     *               return bool-value, if $key is used and available<br>
2041
     *               otherwise return <strong>null</strong>
2042
     */
2043 27
    public static function getSupportInfo(string $key = null)
2044
    {
2045 27
        if ($key === null) {
2046 4
            return self::$SUPPORT;
2047
        }
2048
2049 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2050 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2051
        }
2052
        // compatibility fix for old versions
2053 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2054
2055 25
        return self::$SUPPORT[$key] ?? null;
2056
    }
2057
2058
    /**
2059
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2060
     *          if you need more supported types, please use e.g. "finfo"
2061
     *
2062
     * @param string $str
2063
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2064
     *
2065
     * @return array
2066
     *               with this keys: 'ext', 'mime', 'type'
2067
     */
2068 39
    public static function get_file_type(
2069
        string $str,
2070
        array $fallback = [
2071
            'ext'  => null,
2072
            'mime' => 'application/octet-stream',
2073
            'type' => null,
2074
        ]
2075
    ): array {
2076 39
        if ($str === '') {
2077
            return $fallback;
2078
        }
2079
2080 39
        $str_info = \substr($str, 0, 2);
2081 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2082 11
            return $fallback;
2083
        }
2084
2085 35
        $str_info = \unpack('C2chars', $str_info);
2086 35
        if ($str_info === false) {
2087
            return $fallback;
2088
        }
2089 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2090
2091
        // DEBUG
2092
        //var_dump($type_code);
2093
2094
        switch ($type_code) {
2095 35
            case 3780:
2096 5
                $ext = 'pdf';
2097 5
                $mime = 'application/pdf';
2098 5
                $type = 'binary';
2099
2100 5
                break;
2101 35
            case 7790:
2102
                $ext = 'exe';
2103
                $mime = 'application/octet-stream';
2104
                $type = 'binary';
2105
2106
                break;
2107 35
            case 7784:
2108
                $ext = 'midi';
2109
                $mime = 'audio/x-midi';
2110
                $type = 'binary';
2111
2112
                break;
2113 35
            case 8075:
2114 7
                $ext = 'zip';
2115 7
                $mime = 'application/zip';
2116 7
                $type = 'binary';
2117
2118 7
                break;
2119 35
            case 8297:
2120
                $ext = 'rar';
2121
                $mime = 'application/rar';
2122
                $type = 'binary';
2123
2124
                break;
2125 35
            case 255216:
2126
                $ext = 'jpg';
2127
                $mime = 'image/jpeg';
2128
                $type = 'binary';
2129
2130
                break;
2131 35
            case 7173:
2132
                $ext = 'gif';
2133
                $mime = 'image/gif';
2134
                $type = 'binary';
2135
2136
                break;
2137 35
            case 6677:
2138
                $ext = 'bmp';
2139
                $mime = 'image/bmp';
2140
                $type = 'binary';
2141
2142
                break;
2143 35
            case 13780:
2144 7
                $ext = 'png';
2145 7
                $mime = 'image/png';
2146 7
                $type = 'binary';
2147
2148 7
                break;
2149
            default:
2150 32
                return $fallback;
2151
        }
2152
2153
        return [
2154 7
            'ext'  => $ext,
2155 7
            'mime' => $mime,
2156 7
            'type' => $type,
2157
        ];
2158
    }
2159
2160
    /**
2161
     * @param int    $length        <p>Length of the random string.</p>
2162
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2163
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2164
     *
2165
     * @return string
2166
     */
2167 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2168
    {
2169
        // init
2170 1
        $i = 0;
2171 1
        $str = '';
2172
2173
        //
2174
        // add random chars
2175
        //
2176
2177 1
        if ($encoding === 'UTF-8') {
2178 1
            $maxlength = (int) \mb_strlen($possibleChars);
2179 1
            if ($maxlength === 0) {
2180 1
                return '';
2181
            }
2182
2183 1
            while ($i < $length) {
2184
                try {
2185 1
                    $randInt = \random_int(0, $maxlength - 1);
2186
                } catch (\Exception $e) {
2187
                    /** @noinspection RandomApiMigrationInspection */
2188
                    $randInt = \mt_rand(0, $maxlength - 1);
2189
                }
2190 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2191 1
                if ($char !== false) {
2192 1
                    $str .= $char;
2193 1
                    ++$i;
2194
                }
2195
            }
2196
        } else {
2197
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2198
2199
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2200
            if ($maxlength === 0) {
2201
                return '';
2202
            }
2203
2204
            while ($i < $length) {
2205
                try {
2206
                    $randInt = \random_int(0, $maxlength - 1);
2207
                } catch (\Exception $e) {
2208
                    /** @noinspection RandomApiMigrationInspection */
2209
                    $randInt = \mt_rand(0, $maxlength - 1);
2210
                }
2211
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2212
                if ($char !== false) {
2213
                    $str .= $char;
2214
                    ++$i;
2215
                }
2216
            }
2217
        }
2218
2219 1
        return $str;
2220
    }
2221
2222
    /**
2223
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2224
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2225
     *
2226
     * @return string
2227
     */
2228 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2229
    {
2230 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2231 1
                        \session_id() .
2232 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2233 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2234 1
                        $entropyExtra;
2235
2236 1
        $uniqueString = \uniqid($uniqueHelper, true);
2237
2238 1
        if ($md5) {
2239 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2240
        }
2241
2242 1
        return $uniqueString;
2243
    }
2244
2245
    /**
2246
     * alias for "UTF8::string_has_bom()"
2247
     *
2248
     * @see        UTF8::string_has_bom()
2249
     *
2250
     * @param string $str
2251
     *
2252
     * @return bool
2253
     *
2254
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2255
     */
2256 2
    public static function hasBom(string $str): bool
2257
    {
2258 2
        return self::string_has_bom($str);
2259
    }
2260
2261
    /**
2262
     * Returns true if the string contains a lower case char, false otherwise.
2263
     *
2264
     * @param string $str <p>The input string.</p>
2265
     *
2266
     * @return bool whether or not the string contains a lower case character
2267
     */
2268 47
    public static function has_lowercase(string $str): bool
2269
    {
2270 47
        if (self::$SUPPORT['mbstring'] === true) {
2271
            /** @noinspection PhpComposerExtensionStubsInspection */
2272 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2273
        }
2274
2275
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2276
    }
2277
2278
    /**
2279
     * Returns true if the string contains an upper case char, false otherwise.
2280
     *
2281
     * @param string $str <p>The input string.</p>
2282
     *
2283
     * @return bool whether or not the string contains an upper case character
2284
     */
2285 12
    public static function has_uppercase(string $str): bool
2286
    {
2287 12
        if (self::$SUPPORT['mbstring'] === true) {
2288
            /** @noinspection PhpComposerExtensionStubsInspection */
2289 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2290
        }
2291
2292
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2293
    }
2294
2295
    /**
2296
     * Converts a hexadecimal-value into an UTF-8 character.
2297
     *
2298
     * @param string $hexdec <p>The hexadecimal value.</p>
2299
     *
2300
     * @return false|string one single UTF-8 character
2301
     */
2302 4
    public static function hex_to_chr(string $hexdec)
2303
    {
2304 4
        return self::decimal_to_chr(\hexdec($hexdec));
2305
    }
2306
2307
    /**
2308
     * Converts hexadecimal U+xxxx code point representation to integer.
2309
     *
2310
     * INFO: opposite to UTF8::int_to_hex()
2311
     *
2312
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2313
     *
2314
     * @return false|int the code point, or false on failure
2315
     */
2316 2
    public static function hex_to_int($hexDec)
2317
    {
2318
        // init
2319 2
        $hexDec = (string) $hexDec;
2320
2321 2
        if ($hexDec === '') {
2322 2
            return false;
2323
        }
2324
2325 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2326 2
            return \intval($match[1], 16);
2327
        }
2328
2329 2
        return false;
2330
    }
2331
2332
    /**
2333
     * alias for "UTF8::html_entity_decode()"
2334
     *
2335
     * @see UTF8::html_entity_decode()
2336
     *
2337
     * @param string $str
2338
     * @param int    $flags
2339
     * @param string $encoding
2340
     *
2341
     * @return string
2342
     */
2343 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2344
    {
2345 4
        return self::html_entity_decode($str, $flags, $encoding);
2346
    }
2347
2348
    /**
2349
     * Converts a UTF-8 string to a series of HTML numbered entities.
2350
     *
2351
     * INFO: opposite to UTF8::html_decode()
2352
     *
2353
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2354
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2355
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2356
     *
2357
     * @return string HTML numbered entities
2358
     */
2359 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2360
    {
2361 14
        if ($str === '') {
2362 4
            return '';
2363
        }
2364
2365 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2366 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2367
        }
2368
2369
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2370 14
        if (self::$SUPPORT['mbstring'] === true) {
2371 14
            $startCode = 0x00;
2372 14
            if ($keepAsciiChars === true) {
2373 13
                $startCode = 0x80;
2374
            }
2375
2376 14
            if ($encoding === 'UTF-8') {
2377 14
                return \mb_encode_numericentity(
2378 14
                    $str,
2379 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2380
                );
2381
            }
2382
2383 4
            return \mb_encode_numericentity(
2384 4
                $str,
2385 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2386 4
                $encoding
2387
            );
2388
        }
2389
2390
        //
2391
        // fallback via vanilla php
2392
        //
2393
2394
        return \implode(
2395
            '',
2396
            \array_map(
2397
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2398
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2399
                },
2400
                self::str_split($str)
2401
            )
2402
        );
2403
    }
2404
2405
    /**
2406
     * UTF-8 version of html_entity_decode()
2407
     *
2408
     * The reason we are not using html_entity_decode() by itself is because
2409
     * while it is not technically correct to leave out the semicolon
2410
     * at the end of an entity most browsers will still interpret the entity
2411
     * correctly. html_entity_decode() does not convert entities without
2412
     * semicolons, so we are left with our own little solution here. Bummer.
2413
     *
2414
     * Convert all HTML entities to their applicable characters
2415
     *
2416
     * INFO: opposite to UTF8::html_encode()
2417
     *
2418
     * @see http://php.net/manual/en/function.html-entity-decode.php
2419
     *
2420
     * @param string $str      <p>
2421
     *                         The input string.
2422
     *                         </p>
2423
     * @param int    $flags    [optional] <p>
2424
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2425
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2426
     *                         <table>
2427
     *                         Available <i>flags</i> constants
2428
     *                         <tr valign="top">
2429
     *                         <td>Constant Name</td>
2430
     *                         <td>Description</td>
2431
     *                         </tr>
2432
     *                         <tr valign="top">
2433
     *                         <td><b>ENT_COMPAT</b></td>
2434
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2435
     *                         </tr>
2436
     *                         <tr valign="top">
2437
     *                         <td><b>ENT_QUOTES</b></td>
2438
     *                         <td>Will convert both double and single quotes.</td>
2439
     *                         </tr>
2440
     *                         <tr valign="top">
2441
     *                         <td><b>ENT_NOQUOTES</b></td>
2442
     *                         <td>Will leave both double and single quotes unconverted.</td>
2443
     *                         </tr>
2444
     *                         <tr valign="top">
2445
     *                         <td><b>ENT_HTML401</b></td>
2446
     *                         <td>
2447
     *                         Handle code as HTML 4.01.
2448
     *                         </td>
2449
     *                         </tr>
2450
     *                         <tr valign="top">
2451
     *                         <td><b>ENT_XML1</b></td>
2452
     *                         <td>
2453
     *                         Handle code as XML 1.
2454
     *                         </td>
2455
     *                         </tr>
2456
     *                         <tr valign="top">
2457
     *                         <td><b>ENT_XHTML</b></td>
2458
     *                         <td>
2459
     *                         Handle code as XHTML.
2460
     *                         </td>
2461
     *                         </tr>
2462
     *                         <tr valign="top">
2463
     *                         <td><b>ENT_HTML5</b></td>
2464
     *                         <td>
2465
     *                         Handle code as HTML 5.
2466
     *                         </td>
2467
     *                         </tr>
2468
     *                         </table>
2469
     *                         </p>
2470
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2471
     *
2472
     * @return string the decoded string
2473
     */
2474 43
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2475
    {
2476
        if (
2477 43
            !isset($str[3]) // examples: &; || &x;
2478
            ||
2479 43
            \strpos($str, '&') === false // no "&"
2480
        ) {
2481 20
            return $str;
2482
        }
2483
2484 41
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2485 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2486
        }
2487
2488 41
        if ($flags === null) {
2489 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2490
        }
2491
2492
        if (
2493 41
            $encoding !== 'UTF-8'
2494
            &&
2495 41
            $encoding !== 'ISO-8859-1'
2496
            &&
2497 41
            $encoding !== 'WINDOWS-1252'
2498
            &&
2499 41
            self::$SUPPORT['mbstring'] === false
2500
        ) {
2501
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2502
        }
2503
2504
        do {
2505 41
            $str_compare = $str;
2506
2507
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2508 41
            if (self::$SUPPORT['mbstring'] === true) {
2509 41
                if ($encoding === 'UTF-8') {
2510 41
                    $str = \mb_decode_numericentity(
2511 41
                        $str,
2512 41
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2513
                    );
2514
                } else {
2515 4
                    $str = \mb_decode_numericentity(
2516 4
                        $str,
2517 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2518 41
                        $encoding
2519
                    );
2520
                }
2521
            } else {
2522
                $str = (string) \preg_replace_callback(
2523
                    "/&#\d{2,6};/",
2524
                    /**
2525
                     * @param string[] $matches
2526
                     *
2527
                     * @return string
2528
                     */
2529
                    static function (array $matches) use ($encoding): string {
2530
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2531
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2532
                            return $returnTmp;
2533
                        }
2534
2535
                        return $matches[0];
2536
                    },
2537
                    $str
2538
                );
2539
            }
2540
2541 41
            if (\strpos($str, '&') !== false) {
2542 37
                if (\strpos($str, '&#') !== false) {
2543
                    // decode also numeric & UTF16 two byte entities
2544 29
                    $str = (string) \preg_replace(
2545 29
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2546 29
                        '$1;',
2547 29
                        $str
2548
                    );
2549
                }
2550
2551 37
                $str = \html_entity_decode(
2552 37
                    $str,
2553 37
                    $flags,
2554 37
                    $encoding
2555
                );
2556
            }
2557 41
        } while ($str_compare !== $str);
2558
2559 41
        return $str;
2560
    }
2561
2562
    /**
2563
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2564
     *
2565
     * @param string $str
2566
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2567
     *
2568
     * @return string
2569
     */
2570 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2571
    {
2572 6
        return self::htmlspecialchars(
2573 6
            $str,
2574 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2575 6
            $encoding
2576
        );
2577
    }
2578
2579
    /**
2580
     * Remove empty html-tag.
2581
     *
2582
     * e.g.: <tag></tag>
2583
     *
2584
     * @param string $str
2585
     *
2586
     * @return string
2587
     */
2588 1
    public static function html_stripe_empty_tags(string $str): string
2589
    {
2590 1
        return (string) \preg_replace(
2591 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/u",
2592 1
            '',
2593 1
            $str
2594
        );
2595
    }
2596
2597
    /**
2598
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2599
     *
2600
     * @see http://php.net/manual/en/function.htmlentities.php
2601
     *
2602
     * @param string $str           <p>
2603
     *                              The input string.
2604
     *                              </p>
2605
     * @param int    $flags         [optional] <p>
2606
     *                              A bitmask of one or more of the following flags, which specify how to handle
2607
     *                              quotes, invalid code unit sequences and the used document type. The default is
2608
     *                              ENT_COMPAT | ENT_HTML401.
2609
     *                              <table>
2610
     *                              Available <i>flags</i> constants
2611
     *                              <tr valign="top">
2612
     *                              <td>Constant Name</td>
2613
     *                              <td>Description</td>
2614
     *                              </tr>
2615
     *                              <tr valign="top">
2616
     *                              <td><b>ENT_COMPAT</b></td>
2617
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2618
     *                              </tr>
2619
     *                              <tr valign="top">
2620
     *                              <td><b>ENT_QUOTES</b></td>
2621
     *                              <td>Will convert both double and single quotes.</td>
2622
     *                              </tr>
2623
     *                              <tr valign="top">
2624
     *                              <td><b>ENT_NOQUOTES</b></td>
2625
     *                              <td>Will leave both double and single quotes unconverted.</td>
2626
     *                              </tr>
2627
     *                              <tr valign="top">
2628
     *                              <td><b>ENT_IGNORE</b></td>
2629
     *                              <td>
2630
     *                              Silently discard invalid code unit sequences instead of returning
2631
     *                              an empty string. Using this flag is discouraged as it
2632
     *                              may have security implications.
2633
     *                              </td>
2634
     *                              </tr>
2635
     *                              <tr valign="top">
2636
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2637
     *                              <td>
2638
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2639
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2640
     *                              string.
2641
     *                              </td>
2642
     *                              </tr>
2643
     *                              <tr valign="top">
2644
     *                              <td><b>ENT_DISALLOWED</b></td>
2645
     *                              <td>
2646
     *                              Replace invalid code points for the given document type with a
2647
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2648
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2649
     *                              instance, to ensure the well-formedness of XML documents with
2650
     *                              embedded external content.
2651
     *                              </td>
2652
     *                              </tr>
2653
     *                              <tr valign="top">
2654
     *                              <td><b>ENT_HTML401</b></td>
2655
     *                              <td>
2656
     *                              Handle code as HTML 4.01.
2657
     *                              </td>
2658
     *                              </tr>
2659
     *                              <tr valign="top">
2660
     *                              <td><b>ENT_XML1</b></td>
2661
     *                              <td>
2662
     *                              Handle code as XML 1.
2663
     *                              </td>
2664
     *                              </tr>
2665
     *                              <tr valign="top">
2666
     *                              <td><b>ENT_XHTML</b></td>
2667
     *                              <td>
2668
     *                              Handle code as XHTML.
2669
     *                              </td>
2670
     *                              </tr>
2671
     *                              <tr valign="top">
2672
     *                              <td><b>ENT_HTML5</b></td>
2673
     *                              <td>
2674
     *                              Handle code as HTML 5.
2675
     *                              </td>
2676
     *                              </tr>
2677
     *                              </table>
2678
     *                              </p>
2679
     * @param string $encoding      [optional] <p>
2680
     *                              Like <b>htmlspecialchars</b>,
2681
     *                              <b>htmlentities</b> takes an optional third argument
2682
     *                              <i>encoding</i> which defines encoding used in
2683
     *                              conversion.
2684
     *                              Although this argument is technically optional, you are highly
2685
     *                              encouraged to specify the correct value for your code.
2686
     *                              </p>
2687
     * @param bool   $double_encode [optional] <p>
2688
     *                              When <i>double_encode</i> is turned off PHP will not
2689
     *                              encode existing html entities. The default is to convert everything.
2690
     *                              </p>
2691
     *
2692
     * @return string
2693
     *                <p>
2694
     *                The encoded string.
2695
     *                <br><br>
2696
     *                If the input <i>string</i> contains an invalid code unit
2697
     *                sequence within the given <i>encoding</i> an empty string
2698
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2699
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2700
     *                </p>
2701
     */
2702 9
    public static function htmlentities(
2703
        string $str,
2704
        int $flags = \ENT_COMPAT,
2705
        string $encoding = 'UTF-8',
2706
        bool $double_encode = true
2707
    ): string {
2708 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2709 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2710
        }
2711
2712 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2713
2714
        /**
2715
         * PHP doesn't replace a backslash to its html entity since this is something
2716
         * that's mostly used to escape characters when inserting in a database. Since
2717
         * we're using a decent database layer, we don't need this shit and we're replacing
2718
         * the double backslashes by its' html entity equivalent.
2719
         *
2720
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2721
         */
2722 9
        $str = \str_replace('\\', '&#92;', $str);
2723
2724 9
        return self::html_encode($str, true, $encoding);
2725
    }
2726
2727
    /**
2728
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2729
     *
2730
     * INFO: Take a look at "UTF8::htmlentities()"
2731
     *
2732
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2733
     *
2734
     * @param string $str           <p>
2735
     *                              The string being converted.
2736
     *                              </p>
2737
     * @param int    $flags         [optional] <p>
2738
     *                              A bitmask of one or more of the following flags, which specify how to handle
2739
     *                              quotes, invalid code unit sequences and the used document type. The default is
2740
     *                              ENT_COMPAT | ENT_HTML401.
2741
     *                              <table>
2742
     *                              Available <i>flags</i> constants
2743
     *                              <tr valign="top">
2744
     *                              <td>Constant Name</td>
2745
     *                              <td>Description</td>
2746
     *                              </tr>
2747
     *                              <tr valign="top">
2748
     *                              <td><b>ENT_COMPAT</b></td>
2749
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2750
     *                              </tr>
2751
     *                              <tr valign="top">
2752
     *                              <td><b>ENT_QUOTES</b></td>
2753
     *                              <td>Will convert both double and single quotes.</td>
2754
     *                              </tr>
2755
     *                              <tr valign="top">
2756
     *                              <td><b>ENT_NOQUOTES</b></td>
2757
     *                              <td>Will leave both double and single quotes unconverted.</td>
2758
     *                              </tr>
2759
     *                              <tr valign="top">
2760
     *                              <td><b>ENT_IGNORE</b></td>
2761
     *                              <td>
2762
     *                              Silently discard invalid code unit sequences instead of returning
2763
     *                              an empty string. Using this flag is discouraged as it
2764
     *                              may have security implications.
2765
     *                              </td>
2766
     *                              </tr>
2767
     *                              <tr valign="top">
2768
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2769
     *                              <td>
2770
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2771
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2772
     *                              string.
2773
     *                              </td>
2774
     *                              </tr>
2775
     *                              <tr valign="top">
2776
     *                              <td><b>ENT_DISALLOWED</b></td>
2777
     *                              <td>
2778
     *                              Replace invalid code points for the given document type with a
2779
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2780
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2781
     *                              instance, to ensure the well-formedness of XML documents with
2782
     *                              embedded external content.
2783
     *                              </td>
2784
     *                              </tr>
2785
     *                              <tr valign="top">
2786
     *                              <td><b>ENT_HTML401</b></td>
2787
     *                              <td>
2788
     *                              Handle code as HTML 4.01.
2789
     *                              </td>
2790
     *                              </tr>
2791
     *                              <tr valign="top">
2792
     *                              <td><b>ENT_XML1</b></td>
2793
     *                              <td>
2794
     *                              Handle code as XML 1.
2795
     *                              </td>
2796
     *                              </tr>
2797
     *                              <tr valign="top">
2798
     *                              <td><b>ENT_XHTML</b></td>
2799
     *                              <td>
2800
     *                              Handle code as XHTML.
2801
     *                              </td>
2802
     *                              </tr>
2803
     *                              <tr valign="top">
2804
     *                              <td><b>ENT_HTML5</b></td>
2805
     *                              <td>
2806
     *                              Handle code as HTML 5.
2807
     *                              </td>
2808
     *                              </tr>
2809
     *                              </table>
2810
     *                              </p>
2811
     * @param string $encoding      [optional] <p>
2812
     *                              Defines encoding used in conversion.
2813
     *                              </p>
2814
     *                              <p>
2815
     *                              For the purposes of this function, the encodings
2816
     *                              ISO-8859-1, ISO-8859-15,
2817
     *                              UTF-8, cp866,
2818
     *                              cp1251, cp1252, and
2819
     *                              KOI8-R are effectively equivalent, provided the
2820
     *                              <i>string</i> itself is valid for the encoding, as
2821
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2822
     *                              the same positions in all of these encodings.
2823
     *                              </p>
2824
     * @param bool   $double_encode [optional] <p>
2825
     *                              When <i>double_encode</i> is turned off PHP will not
2826
     *                              encode existing html entities, the default is to convert everything.
2827
     *                              </p>
2828
     *
2829
     * @return string the converted string.
2830
     *                </p>
2831
     *                <p>
2832
     *                If the input <i>string</i> contains an invalid code unit
2833
     *                sequence within the given <i>encoding</i> an empty string
2834
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2835
     *                <b>ENT_SUBSTITUTE</b> flags are set
2836
     */
2837 8
    public static function htmlspecialchars(
2838
        string $str,
2839
        int $flags = \ENT_COMPAT,
2840
        string $encoding = 'UTF-8',
2841
        bool $double_encode = true
2842
    ): string {
2843 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2844 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2845
        }
2846
2847 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2848
    }
2849
2850
    /**
2851
     * Checks whether iconv is available on the server.
2852
     *
2853
     * @return bool
2854
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2855
     */
2856
    public static function iconv_loaded(): bool
2857
    {
2858
        return \extension_loaded('iconv');
2859
    }
2860
2861
    /**
2862
     * alias for "UTF8::decimal_to_chr()"
2863
     *
2864
     * @see UTF8::decimal_to_chr()
2865
     *
2866
     * @param mixed $int
2867
     *
2868
     * @return string
2869
     */
2870 4
    public static function int_to_chr($int): string
2871
    {
2872 4
        return self::decimal_to_chr($int);
2873
    }
2874
2875
    /**
2876
     * Converts Integer to hexadecimal U+xxxx code point representation.
2877
     *
2878
     * INFO: opposite to UTF8::hex_to_int()
2879
     *
2880
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2881
     * @param string $pfix [optional]
2882
     *
2883
     * @return string the code point, or empty string on failure
2884
     */
2885 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2886
    {
2887 6
        $hex = \dechex($int);
2888
2889 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2890
2891 6
        return $pfix . $hex . '';
2892
    }
2893
2894
    /**
2895
     * Checks whether intl-char is available on the server.
2896
     *
2897
     * @return bool
2898
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2899
     */
2900
    public static function intlChar_loaded(): bool
2901
    {
2902
        return \class_exists('IntlChar');
2903
    }
2904
2905
    /**
2906
     * Checks whether intl is available on the server.
2907
     *
2908
     * @return bool
2909
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2910
     */
2911 5
    public static function intl_loaded(): bool
2912
    {
2913 5
        return \extension_loaded('intl');
2914
    }
2915
2916
    /**
2917
     * alias for "UTF8::is_ascii()"
2918
     *
2919
     * @see        UTF8::is_ascii()
2920
     *
2921
     * @param string $str
2922
     *
2923
     * @return bool
2924
     *
2925
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2926
     */
2927 2
    public static function isAscii(string $str): bool
2928
    {
2929 2
        return self::is_ascii($str);
2930
    }
2931
2932
    /**
2933
     * alias for "UTF8::is_base64()"
2934
     *
2935
     * @see        UTF8::is_base64()
2936
     *
2937
     * @param string $str
2938
     *
2939
     * @return bool
2940
     *
2941
     * @deprecated <p>use "UTF8::is_base64()"</p>
2942
     */
2943 2
    public static function isBase64($str): bool
2944
    {
2945 2
        return self::is_base64($str);
2946
    }
2947
2948
    /**
2949
     * alias for "UTF8::is_binary()"
2950
     *
2951
     * @see        UTF8::is_binary()
2952
     *
2953
     * @param mixed $str
2954
     * @param bool  $strict
2955
     *
2956
     * @return bool
2957
     *
2958
     * @deprecated <p>use "UTF8::is_binary()"</p>
2959
     */
2960 4
    public static function isBinary($str, $strict = false): bool
2961
    {
2962 4
        return self::is_binary($str, $strict);
2963
    }
2964
2965
    /**
2966
     * alias for "UTF8::is_bom()"
2967
     *
2968
     * @see        UTF8::is_bom()
2969
     *
2970
     * @param string $utf8_chr
2971
     *
2972
     * @return bool
2973
     *
2974
     * @deprecated <p>use "UTF8::is_bom()"</p>
2975
     */
2976 2
    public static function isBom(string $utf8_chr): bool
2977
    {
2978 2
        return self::is_bom($utf8_chr);
2979
    }
2980
2981
    /**
2982
     * alias for "UTF8::is_html()"
2983
     *
2984
     * @see        UTF8::is_html()
2985
     *
2986
     * @param string $str
2987
     *
2988
     * @return bool
2989
     *
2990
     * @deprecated <p>use "UTF8::is_html()"</p>
2991
     */
2992 2
    public static function isHtml(string $str): bool
2993
    {
2994 2
        return self::is_html($str);
2995
    }
2996
2997
    /**
2998
     * alias for "UTF8::is_json()"
2999
     *
3000
     * @see        UTF8::is_json()
3001
     *
3002
     * @param string $str
3003
     *
3004
     * @return bool
3005
     *
3006
     * @deprecated <p>use "UTF8::is_json()"</p>
3007
     */
3008
    public static function isJson(string $str): bool
3009
    {
3010
        return self::is_json($str);
3011
    }
3012
3013
    /**
3014
     * alias for "UTF8::is_utf16()"
3015
     *
3016
     * @see        UTF8::is_utf16()
3017
     *
3018
     * @param mixed $str
3019
     *
3020
     * @return false|int
3021
     *                   <strong>false</strong> if is't not UTF16,<br>
3022
     *                   <strong>1</strong> for UTF-16LE,<br>
3023
     *                   <strong>2</strong> for UTF-16BE
3024
     *
3025
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3026
     */
3027 2
    public static function isUtf16($str)
3028
    {
3029 2
        return self::is_utf16($str);
3030
    }
3031
3032
    /**
3033
     * alias for "UTF8::is_utf32()"
3034
     *
3035
     * @see        UTF8::is_utf32()
3036
     *
3037
     * @param mixed $str
3038
     *
3039
     * @return false|int
3040
     *                   <strong>false</strong> if is't not UTF16,
3041
     *                   <strong>1</strong> for UTF-32LE,
3042
     *                   <strong>2</strong> for UTF-32BE
3043
     *
3044
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3045
     */
3046 2
    public static function isUtf32($str)
3047
    {
3048 2
        return self::is_utf32($str);
3049
    }
3050
3051
    /**
3052
     * alias for "UTF8::is_utf8()"
3053
     *
3054
     * @see        UTF8::is_utf8()
3055
     *
3056
     * @param string $str
3057
     * @param bool   $strict
3058
     *
3059
     * @return bool
3060
     *
3061
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3062
     */
3063 17
    public static function isUtf8($str, $strict = false): bool
3064
    {
3065 17
        return self::is_utf8($str, $strict);
3066
    }
3067
3068
    /**
3069
     * Returns true if the string contains only alphabetic chars, false otherwise.
3070
     *
3071
     * @param string $str
3072
     *
3073
     * @return bool
3074
     *              Whether or not $str contains only alphabetic chars
3075
     */
3076 10
    public static function is_alpha(string $str): bool
3077
    {
3078 10
        if (self::$SUPPORT['mbstring'] === true) {
3079
            /** @noinspection PhpComposerExtensionStubsInspection */
3080 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3081
        }
3082
3083
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3084
    }
3085
3086
    /**
3087
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3088
     *
3089
     * @param string $str
3090
     *
3091
     * @return bool
3092
     *              Whether or not $str contains only alphanumeric chars
3093
     */
3094 13
    public static function is_alphanumeric(string $str): bool
3095
    {
3096 13
        if (self::$SUPPORT['mbstring'] === true) {
3097
            /** @noinspection PhpComposerExtensionStubsInspection */
3098 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3099
        }
3100
3101
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3102
    }
3103
3104
    /**
3105
     * Checks if a string is 7 bit ASCII.
3106
     *
3107
     * @param string $str <p>The string to check.</p>
3108
     *
3109
     * @return bool
3110
     *              <strong>true</strong> if it is ASCII<br>
3111
     *              <strong>false</strong> otherwise
3112
     */
3113 137
    public static function is_ascii(string $str): bool
3114
    {
3115 137
        if ($str === '') {
3116 10
            return true;
3117
        }
3118
3119 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3120
    }
3121
3122
    /**
3123
     * Returns true if the string is base64 encoded, false otherwise.
3124
     *
3125
     * @param mixed|string $str                <p>The input string.</p>
3126
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3127
     *
3128
     * @return bool whether or not $str is base64 encoded
3129
     */
3130 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3131
    {
3132 16
        if ($emptyStringIsValid === false && $str === '') {
3133 3
            return false;
3134
        }
3135
3136
        /**
3137
         * @psalm-suppress RedundantConditionGivenDocblockType
3138
         */
3139 15
        if (\is_string($str) === false) {
3140 2
            return false;
3141
        }
3142
3143 15
        $base64String = \base64_decode($str, true);
3144
3145 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3146
    }
3147
3148
    /**
3149
     * Check if the input is binary... (is look like a hack).
3150
     *
3151
     * @param mixed $input
3152
     * @param bool  $strict
3153
     *
3154
     * @return bool
3155
     */
3156 39
    public static function is_binary($input, bool $strict = false): bool
3157
    {
3158 39
        $input = (string) $input;
3159 39
        if ($input === '') {
3160 10
            return false;
3161
        }
3162
3163 39
        if (\preg_match('~^[01]+$~', $input)) {
3164 13
            return true;
3165
        }
3166
3167 39
        $ext = self::get_file_type($input);
3168 39
        if ($ext['type'] === 'binary') {
3169 7
            return true;
3170
        }
3171
3172 36
        $testLength = \strlen($input);
3173 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3174 36
        if (($testNull / $testLength) > 0.25) {
3175 12
            return true;
3176
        }
3177
3178 34
        if ($strict === true) {
3179 34
            if (self::$SUPPORT['finfo'] === false) {
3180
                throw new \RuntimeException('ext-fileinfo: is not installed');
3181
            }
3182
3183
            /** @noinspection PhpComposerExtensionStubsInspection */
3184 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3185 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3186 15
                return true;
3187
            }
3188
        }
3189
3190 30
        return false;
3191
    }
3192
3193
    /**
3194
     * Check if the file is binary.
3195
     *
3196
     * @param string $file
3197
     *
3198
     * @return bool
3199
     */
3200 6
    public static function is_binary_file($file): bool
3201
    {
3202
        // init
3203 6
        $block = '';
3204
3205 6
        $fp = \fopen($file, 'rb');
3206 6
        if (\is_resource($fp)) {
3207 6
            $block = \fread($fp, 512);
3208 6
            \fclose($fp);
3209
        }
3210
3211 6
        if ($block === '') {
3212 2
            return false;
3213
        }
3214
3215 6
        return self::is_binary($block, true);
3216
    }
3217
3218
    /**
3219
     * Returns true if the string contains only whitespace chars, false otherwise.
3220
     *
3221
     * @param string $str
3222
     *
3223
     * @return bool
3224
     *              Whether or not $str contains only whitespace characters
3225
     */
3226 15
    public static function is_blank(string $str): bool
3227
    {
3228 15
        if (self::$SUPPORT['mbstring'] === true) {
3229
            /** @noinspection PhpComposerExtensionStubsInspection */
3230 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3231
        }
3232
3233
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3234
    }
3235
3236
    /**
3237
     * Checks if the given string is equal to any "Byte Order Mark".
3238
     *
3239
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3240
     *
3241
     * @param string $str <p>The input string.</p>
3242
     *
3243
     * @return bool
3244
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3245
     */
3246 2
    public static function is_bom($str): bool
3247
    {
3248
        /** @noinspection PhpUnusedLocalVariableInspection */
3249 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3250 2
            if ($str === $bomString) {
3251 2
                return true;
3252
            }
3253
        }
3254
3255 2
        return false;
3256
    }
3257
3258
    /**
3259
     * Determine whether the string is considered to be empty.
3260
     *
3261
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3262
     * empty() does not generate a warning if the variable does not exist.
3263
     *
3264
     * @param mixed $str
3265
     *
3266
     * @return bool whether or not $str is empty()
3267
     */
3268
    public static function is_empty($str): bool
3269
    {
3270
        return empty($str);
3271
    }
3272
3273
    /**
3274
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3275
     *
3276
     * @param string $str
3277
     *
3278
     * @return bool
3279
     *              Whether or not $str contains only hexadecimal chars
3280
     */
3281 13
    public static function is_hexadecimal(string $str): bool
3282
    {
3283 13
        if (self::$SUPPORT['mbstring'] === true) {
3284
            /** @noinspection PhpComposerExtensionStubsInspection */
3285 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3286
        }
3287
3288
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3289
    }
3290
3291
    /**
3292
     * Check if the string contains any html-tags <lall>.
3293
     *
3294
     * @param string $str <p>The input string.</p>
3295
     *
3296
     * @return bool
3297
     */
3298 3
    public static function is_html(string $str): bool
3299
    {
3300 3
        if ($str === '') {
3301 3
            return false;
3302
        }
3303
3304
        // init
3305 3
        $matches = [];
3306
3307 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/u", $str, $matches);
3308
3309 3
        return \count($matches) !== 0;
3310
    }
3311
3312
    /**
3313
     * Try to check if "$str" is an json-string.
3314
     *
3315
     * @param string $str                              <p>The input string.</p>
3316
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3317
     *
3318
     * @return bool
3319
     */
3320 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3321
    {
3322 42
        if ($str === '') {
3323 4
            return false;
3324
        }
3325
3326 40
        if (self::$SUPPORT['json'] === false) {
3327
            throw new \RuntimeException('ext-json: is not installed');
3328
        }
3329
3330 40
        $json = self::json_decode($str);
3331 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3332 18
            return false;
3333
        }
3334
3335
        if (
3336 24
            $onlyArrayOrObjectResultsAreValid === true
3337
            &&
3338 24
            \is_object($json) === false
3339
            &&
3340 24
            \is_array($json) === false
3341
        ) {
3342 5
            return false;
3343
        }
3344
3345
        /** @noinspection PhpComposerExtensionStubsInspection */
3346 19
        return \json_last_error() === \JSON_ERROR_NONE;
3347
    }
3348
3349
    /**
3350
     * @param string $str
3351
     *
3352
     * @return bool
3353
     */
3354 8
    public static function is_lowercase(string $str): bool
3355
    {
3356 8
        if (self::$SUPPORT['mbstring'] === true) {
3357
            /** @noinspection PhpComposerExtensionStubsInspection */
3358 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3359
        }
3360
3361
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3362
    }
3363
3364
    /**
3365
     * Returns true if the string is serialized, false otherwise.
3366
     *
3367
     * @param string $str
3368
     *
3369
     * @return bool whether or not $str is serialized
3370
     */
3371 7
    public static function is_serialized(string $str): bool
3372
    {
3373 7
        if ($str === '') {
3374 1
            return false;
3375
        }
3376
3377
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3378
        /** @noinspection UnserializeExploitsInspection */
3379 6
        return $str === 'b:0;'
3380
               ||
3381 6
               @\unserialize($str) !== false;
3382
    }
3383
3384
    /**
3385
     * Returns true if the string contains only lower case chars, false
3386
     * otherwise.
3387
     *
3388
     * @param string $str <p>The input string.</p>
3389
     *
3390
     * @return bool
3391
     *              Whether or not $str contains only lower case characters
3392
     */
3393 8
    public static function is_uppercase(string $str): bool
3394
    {
3395 8
        if (self::$SUPPORT['mbstring'] === true) {
3396
            /** @noinspection PhpComposerExtensionStubsInspection */
3397 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3398
        }
3399
3400
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3401
    }
3402
3403
    /**
3404
     * Check if the string is UTF-16.
3405
     *
3406
     * @param mixed $str                   <p>The input string.</p>
3407
     * @param bool  $checkIfStringIsBinary
3408
     *
3409
     * @return false|int
3410
     *                   <strong>false</strong> if is't not UTF-16,<br>
3411
     *                   <strong>1</strong> for UTF-16LE,<br>
3412
     *                   <strong>2</strong> for UTF-16BE
3413
     */
3414 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3415
    {
3416
        // init
3417 22
        $str = (string) $str;
3418 22
        $strChars = [];
3419
3420
        if (
3421 22
            $checkIfStringIsBinary === true
3422
            &&
3423 22
            self::is_binary($str, true) === false
3424
        ) {
3425 2
            return false;
3426
        }
3427
3428 22
        if (self::$SUPPORT['mbstring'] === false) {
3429 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3430
        }
3431
3432 22
        $str = self::remove_bom($str);
3433
3434 22
        $maybeUTF16LE = 0;
3435 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3436 22
        if ($test) {
3437 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3438 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3439 15
            if ($test3 === $test) {
3440 15
                if (\count($strChars) === 0) {
3441 15
                    $strChars = self::count_chars($str, true, false);
3442
                }
3443 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3444 15
                    if (\in_array($test3char, $strChars, true) === true) {
3445 15
                        ++$maybeUTF16LE;
3446
                    }
3447
                }
3448 15
                unset($test3charEmpty);
3449
            }
3450
        }
3451
3452 22
        $maybeUTF16BE = 0;
3453 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3454 22
        if ($test) {
3455 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3456 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3457 15
            if ($test3 === $test) {
3458 15
                if (\count($strChars) === 0) {
3459 7
                    $strChars = self::count_chars($str, true, false);
3460
                }
3461 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3462 15
                    if (\in_array($test3char, $strChars, true) === true) {
3463 15
                        ++$maybeUTF16BE;
3464
                    }
3465
                }
3466 15
                unset($test3charEmpty);
3467
            }
3468
        }
3469
3470 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3471 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3472 4
                return 1;
3473
            }
3474
3475 6
            return 2;
3476
        }
3477
3478 18
        return false;
3479
    }
3480
3481
    /**
3482
     * Check if the string is UTF-32.
3483
     *
3484
     * @param mixed $str                   <p>The input string.</p>
3485
     * @param bool  $checkIfStringIsBinary
3486
     *
3487
     * @return false|int
3488
     *                   <strong>false</strong> if is't not UTF-32,<br>
3489
     *                   <strong>1</strong> for UTF-32LE,<br>
3490
     *                   <strong>2</strong> for UTF-32BE
3491
     */
3492 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3493
    {
3494
        // init
3495 18
        $str = (string) $str;
3496 18
        $strChars = [];
3497
3498
        if (
3499 18
            $checkIfStringIsBinary === true
3500
            &&
3501 18
            self::is_binary($str, true) === false
3502
        ) {
3503 2
            return false;
3504
        }
3505
3506 18
        if (self::$SUPPORT['mbstring'] === false) {
3507 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3508
        }
3509
3510 18
        $str = self::remove_bom($str);
3511
3512 18
        $maybeUTF32LE = 0;
3513 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3514 18
        if ($test) {
3515 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3516 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3517 11
            if ($test3 === $test) {
3518 11
                if (\count($strChars) === 0) {
3519 11
                    $strChars = self::count_chars($str, true, false);
3520
                }
3521 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3522 11
                    if (\in_array($test3char, $strChars, true) === true) {
3523 11
                        ++$maybeUTF32LE;
3524
                    }
3525
                }
3526 11
                unset($test3charEmpty);
3527
            }
3528
        }
3529
3530 18
        $maybeUTF32BE = 0;
3531 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3532 18
        if ($test) {
3533 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3534 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3535 11
            if ($test3 === $test) {
3536 11
                if (\count($strChars) === 0) {
3537 7
                    $strChars = self::count_chars($str, true, false);
3538
                }
3539 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3540 11
                    if (\in_array($test3char, $strChars, true) === true) {
3541 11
                        ++$maybeUTF32BE;
3542
                    }
3543
                }
3544 11
                unset($test3charEmpty);
3545
            }
3546
        }
3547
3548 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3549 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3550 2
                return 1;
3551
            }
3552
3553 2
            return 2;
3554
        }
3555
3556 18
        return false;
3557
    }
3558
3559
    /**
3560
     * Encode a string with emoji chars into a non-emoji string.
3561
     *
3562
     * @param string $str                        <p>The input string</p>
3563
     * @param bool   $useReversibleStringMapping [optional] <p>
3564
     *                                           when <b>TRUE</b>, we se a reversible string mapping
3565
     *                                           between "emoji_encode" and "emoji_decode"</p>
3566
     *
3567
     * @return string
3568
     */
3569 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
3570
    {
3571 9
        self::initEmojiData();
3572
3573 9
        if ($useReversibleStringMapping === true) {
3574 9
            return (string) \str_replace(
3575 9
                (array) self::$EMOJI_VALUES_CACHE,
3576 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3577 9
                $str
3578
            );
3579
        }
3580
3581 1
        return (string) \str_replace(
3582 1
            (array) self::$EMOJI_VALUES_CACHE,
3583 1
            (array) self::$EMOJI_KEYS_CACHE,
3584 1
            $str
3585
        );
3586
    }
3587
3588
    /**
3589
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
3590
     *
3591
     * @param string $str                        <p>The input string.</p>
3592
     * @param bool   $useReversibleStringMapping [optional] <p>
3593
     *                                           When <b>TRUE</b>, we se a reversible string mapping
3594
     *                                           between "emoji_encode" and "emoji_decode".</p>
3595
     *
3596
     * @return string
3597
     */
3598 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
3599
    {
3600 9
        self::initEmojiData();
3601
3602 9
        if ($useReversibleStringMapping === true) {
3603 9
            return (string) \str_replace(
3604 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3605 9
                (array) self::$EMOJI_VALUES_CACHE,
3606 9
                $str
3607
            );
3608
        }
3609
3610 1
        return (string) \str_replace(
3611 1
            (array) self::$EMOJI_KEYS_CACHE,
3612 1
            (array) self::$EMOJI_VALUES_CACHE,
3613 1
            $str
3614
        );
3615
    }
3616
3617
    /**
3618
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3619
     *
3620
     * @see    http://hsivonen.iki.fi/php-utf8/
3621
     *
3622
     * @param string|string[] $str    <p>The string to be checked.</p>
3623
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3624
     *
3625
     * @return bool
3626
     */
3627 106
    public static function is_utf8($str, bool $strict = false): bool
3628
    {
3629 106
        if (\is_array($str) === true) {
3630 2
            foreach ($str as &$v) {
3631 2
                if (self::is_utf8($v, $strict) === false) {
3632 2
                    return false;
3633
                }
3634
            }
3635
3636
            return true;
3637
        }
3638
3639 106
        if ($str === '') {
3640 12
            return true;
3641
        }
3642
3643 102
        if ($strict === true) {
3644 2
            $isBinary = self::is_binary($str, true);
3645
3646 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3647 2
                return false;
3648
            }
3649
3650
            if ($isBinary && self::is_utf32($str, false) !== false) {
3651
                return false;
3652
            }
3653
        }
3654
3655 102
        if (self::pcre_utf8_support() !== true) {
3656
3657
            // If even just the first character can be matched, when the /u
3658
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3659
            // invalid, nothing at all will match, even if the string contains
3660
            // some valid sequences
3661
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3662
        }
3663
3664 102
        $mState = 0; // cached expected number of octets after the current octet
3665
        // until the beginning of the next UTF8 character sequence
3666 102
        $mUcs4 = 0; // cached Unicode character
3667 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3668
3669 102
        if (self::$ORD === null) {
3670
            self::$ORD = self::getData('ord');
3671
        }
3672
3673 102
        $len = \strlen((string) $str);
3674
        /** @noinspection ForeachInvariantsInspection */
3675 102
        for ($i = 0; $i < $len; ++$i) {
3676 102
            $in = self::$ORD[$str[$i]];
3677 102
            if ($mState === 0) {
3678
                // When mState is zero we expect either a US-ASCII character or a
3679
                // multi-octet sequence.
3680 102
                if ((0x80 & $in) === 0) {
3681
                    // US-ASCII, pass straight through.
3682 97
                    $mBytes = 1;
3683 83
                } elseif ((0xE0 & $in) === 0xC0) {
3684
                    // First octet of 2 octet sequence.
3685 73
                    $mUcs4 = $in;
3686 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3687 73
                    $mState = 1;
3688 73
                    $mBytes = 2;
3689 58
                } elseif ((0xF0 & $in) === 0xE0) {
3690
                    // First octet of 3 octet sequence.
3691 42
                    $mUcs4 = $in;
3692 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3693 42
                    $mState = 2;
3694 42
                    $mBytes = 3;
3695 29
                } elseif ((0xF8 & $in) === 0xF0) {
3696
                    // First octet of 4 octet sequence.
3697 18
                    $mUcs4 = $in;
3698 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3699 18
                    $mState = 3;
3700 18
                    $mBytes = 4;
3701 13
                } elseif ((0xFC & $in) === 0xF8) {
3702
                    /* First octet of 5 octet sequence.
3703
                     *
3704
                     * This is illegal because the encoded codepoint must be either
3705
                     * (a) not the shortest form or
3706
                     * (b) outside the Unicode range of 0-0x10FFFF.
3707
                     * Rather than trying to resynchronize, we will carry on until the end
3708
                     * of the sequence and let the later error handling code catch it.
3709
                     */
3710 5
                    $mUcs4 = $in;
3711 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3712 5
                    $mState = 4;
3713 5
                    $mBytes = 5;
3714 10
                } elseif ((0xFE & $in) === 0xFC) {
3715
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3716 5
                    $mUcs4 = $in;
3717 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3718 5
                    $mState = 5;
3719 5
                    $mBytes = 6;
3720
                } else {
3721
                    // Current octet is neither in the US-ASCII range nor a legal first
3722
                    // octet of a multi-octet sequence.
3723 102
                    return false;
3724
                }
3725 83
            } elseif ((0xC0 & $in) === 0x80) {
3726
3727
                // When mState is non-zero, we expect a continuation of the multi-octet
3728
                // sequence
3729
3730
                // Legal continuation.
3731 75
                $shift = ($mState - 1) * 6;
3732 75
                $tmp = $in;
3733 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3734 75
                $mUcs4 |= $tmp;
3735
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3736
                // Unicode code point to be output.
3737 75
                if (--$mState === 0) {
3738
                    // Check for illegal sequences and code points.
3739
                    //
3740
                    // From Unicode 3.1, non-shortest form is illegal
3741
                    if (
3742 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3743
                        ||
3744 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3745
                        ||
3746 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3747
                        ||
3748 75
                        ($mBytes > 4)
3749
                        ||
3750
                        // From Unicode 3.2, surrogate characters are illegal.
3751 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3752
                        ||
3753
                        // Code points outside the Unicode range are illegal.
3754 75
                        ($mUcs4 > 0x10FFFF)
3755
                    ) {
3756 8
                        return false;
3757
                    }
3758
                    // initialize UTF8 cache
3759 75
                    $mState = 0;
3760 75
                    $mUcs4 = 0;
3761 75
                    $mBytes = 1;
3762
                }
3763
            } else {
3764
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3765
                // Incomplete multi-octet sequence.
3766 35
                return false;
3767
            }
3768
        }
3769
3770 67
        return true;
3771
    }
3772
3773
    /**
3774
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3775
     * Decodes a JSON string
3776
     *
3777
     * @see http://php.net/manual/en/function.json-decode.php
3778
     *
3779
     * @param string $json    <p>
3780
     *                        The <i>json</i> string being decoded.
3781
     *                        </p>
3782
     *                        <p>
3783
     *                        This function only works with UTF-8 encoded strings.
3784
     *                        </p>
3785
     *                        <p>PHP implements a superset of
3786
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3787
     *                        only supports these values when they are nested inside an array or an object.
3788
     *                        </p>
3789
     * @param bool   $assoc   [optional] <p>
3790
     *                        When <b>TRUE</b>, returned objects will be converted into
3791
     *                        associative arrays.
3792
     *                        </p>
3793
     * @param int    $depth   [optional] <p>
3794
     *                        User specified recursion depth.
3795
     *                        </p>
3796
     * @param int    $options [optional] <p>
3797
     *                        Bitmask of JSON decode options. Currently only
3798
     *                        <b>JSON_BIGINT_AS_STRING</b>
3799
     *                        is supported (default is to cast large integers as floats)
3800
     *                        </p>
3801
     *
3802
     * @return mixed
3803
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3804
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3805
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3806
     *               is deeper than the recursion limit.
3807
     */
3808 43
    public static function json_decode(
3809
        string $json,
3810
        bool $assoc = false,
3811
        int $depth = 512,
3812
        int $options = 0
3813
    ) {
3814 43
        $json = self::filter($json);
3815
3816 43
        if (self::$SUPPORT['json'] === false) {
3817
            throw new \RuntimeException('ext-json: is not installed');
3818
        }
3819
3820
        /** @noinspection PhpComposerExtensionStubsInspection */
3821 43
        return \json_decode($json, $assoc, $depth, $options);
3822
    }
3823
3824
    /**
3825
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3826
     * Returns the JSON representation of a value.
3827
     *
3828
     * @see http://php.net/manual/en/function.json-encode.php
3829
     *
3830
     * @param mixed $value   <p>
3831
     *                       The <i>value</i> being encoded. Can be any type except
3832
     *                       a resource.
3833
     *                       </p>
3834
     *                       <p>
3835
     *                       All string data must be UTF-8 encoded.
3836
     *                       </p>
3837
     *                       <p>PHP implements a superset of
3838
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3839
     *                       only supports these values when they are nested inside an array or an object.
3840
     *                       </p>
3841
     * @param int   $options [optional] <p>
3842
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3843
     *                       <b>JSON_HEX_TAG</b>,
3844
     *                       <b>JSON_HEX_AMP</b>,
3845
     *                       <b>JSON_HEX_APOS</b>,
3846
     *                       <b>JSON_NUMERIC_CHECK</b>,
3847
     *                       <b>JSON_PRETTY_PRINT</b>,
3848
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3849
     *                       <b>JSON_FORCE_OBJECT</b>,
3850
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3851
     *                       constants is described on
3852
     *                       the JSON constants page.
3853
     *                       </p>
3854
     * @param int   $depth   [optional] <p>
3855
     *                       Set the maximum depth. Must be greater than zero.
3856
     *                       </p>
3857
     *
3858
     * @return false|string
3859
     *                      A JSON encoded <strong>string</strong> on success or<br>
3860
     *                      <strong>FALSE</strong> on failure
3861
     */
3862 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3863
    {
3864 5
        $value = self::filter($value);
3865
3866 5
        if (self::$SUPPORT['json'] === false) {
3867
            throw new \RuntimeException('ext-json: is not installed');
3868
        }
3869
3870
        /** @noinspection PhpComposerExtensionStubsInspection */
3871 5
        return \json_encode($value, $options, $depth);
3872
    }
3873
3874
    /**
3875
     * Checks whether JSON is available on the server.
3876
     *
3877
     * @return bool
3878
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3879
     */
3880
    public static function json_loaded(): bool
3881
    {
3882
        return \function_exists('json_decode');
3883
    }
3884
3885
    /**
3886
     * Makes string's first char lowercase.
3887
     *
3888
     * @param string      $str                   <p>The input string</p>
3889
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3890
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3891
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3892
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3893
     *
3894
     * @return string the resulting string
3895
     */
3896 46
    public static function lcfirst(
3897
        string $str,
3898
        string $encoding = 'UTF-8',
3899
        bool $cleanUtf8 = false,
3900
        string $lang = null,
3901
        bool $tryToKeepStringLength = false
3902
    ): string {
3903 46
        if ($cleanUtf8 === true) {
3904
            $str = self::clean($str);
3905
        }
3906
3907 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3908
3909 46
        if ($encoding === 'UTF-8') {
3910 43
            $strPartTwo = (string) \mb_substr($str, 1);
3911
3912 43
            if ($useMbFunction === true) {
3913 43
                $strPartOne = \mb_strtolower(
3914 43
                    (string) \mb_substr($str, 0, 1)
3915
                );
3916
            } else {
3917
                $strPartOne = self::strtolower(
3918
                    (string) \mb_substr($str, 0, 1),
3919
                    $encoding,
3920
                    false,
3921
                    $lang,
3922 43
                    $tryToKeepStringLength
3923
                );
3924
            }
3925
        } else {
3926 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3927
3928 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3929
3930 3
            $strPartOne = self::strtolower(
3931 3
                (string) self::substr($str, 0, 1, $encoding),
3932 3
                $encoding,
3933 3
                false,
3934 3
                $lang,
3935 3
                $tryToKeepStringLength
3936
            );
3937
        }
3938
3939 46
        return $strPartOne . $strPartTwo;
3940
    }
3941
3942
    /**
3943
     * alias for "UTF8::lcfirst()"
3944
     *
3945
     * @see UTF8::lcfirst()
3946
     *
3947
     * @param string      $str
3948
     * @param string      $encoding
3949
     * @param bool        $cleanUtf8
3950
     * @param string|null $lang
3951
     * @param bool        $tryToKeepStringLength
3952
     *
3953
     * @return string
3954
     */
3955 2
    public static function lcword(
3956
        string $str,
3957
        string $encoding = 'UTF-8',
3958
        bool $cleanUtf8 = false,
3959
        string $lang = null,
3960
        bool $tryToKeepStringLength = false
3961
    ): string {
3962 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3963
    }
3964
3965
    /**
3966
     * Lowercase for all words in the string.
3967
     *
3968
     * @param string      $str                   <p>The input string.</p>
3969
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3970
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3971
     *                                           a new word.</p>
3972
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3973
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3974
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3975
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3976
     *
3977
     * @return string
3978
     */
3979 2
    public static function lcwords(
3980
        string $str,
3981
        array $exceptions = [],
3982
        string $charlist = '',
3983
        string $encoding = 'UTF-8',
3984
        bool $cleanUtf8 = false,
3985
        string $lang = null,
3986
        bool $tryToKeepStringLength = false
3987
    ): string {
3988 2
        if (!$str) {
3989 2
            return '';
3990
        }
3991
3992 2
        $words = self::str_to_words($str, $charlist);
3993 2
        $useExceptions = \count($exceptions) > 0;
3994
3995 2
        foreach ($words as &$word) {
3996 2
            if (!$word) {
3997 2
                continue;
3998
            }
3999
4000
            if (
4001 2
                $useExceptions === false
4002
                ||
4003 2
                !\in_array($word, $exceptions, true)
4004
            ) {
4005 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4006
            }
4007
        }
4008
4009 2
        return \implode('', $words);
4010
    }
4011
4012
    /**
4013
     * alias for "UTF8::lcfirst()"
4014
     *
4015
     * @see UTF8::lcfirst()
4016
     *
4017
     * @param string      $str
4018
     * @param string      $encoding
4019
     * @param bool        $cleanUtf8
4020
     * @param string|null $lang
4021
     * @param bool        $tryToKeepStringLength
4022
     *
4023
     * @return string
4024
     */
4025 5
    public static function lowerCaseFirst(
4026
        string $str,
4027
        string $encoding = 'UTF-8',
4028
        bool $cleanUtf8 = false,
4029
        string $lang = null,
4030
        bool $tryToKeepStringLength = false
4031
    ): string {
4032 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4033
    }
4034
4035
    /**
4036
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4037
     *
4038
     * @param string      $str   <p>The string to be trimmed</p>
4039
     * @param string|null $chars <p>Optional characters to be stripped</p>
4040
     *
4041
     * @return string the string with unwanted characters stripped from the left
4042
     */
4043 22
    public static function ltrim(string $str = '', string $chars = null): string
4044
    {
4045 22
        if ($str === '') {
4046 3
            return '';
4047
        }
4048
4049 21
        if ($chars) {
4050 10
            $chars = \preg_quote($chars, '/');
4051 10
            $pattern = "^[${chars}]+";
4052
        } else {
4053 14
            $pattern = "^[\s]+";
4054
        }
4055
4056 21
        if (self::$SUPPORT['mbstring'] === true) {
4057
            /** @noinspection PhpComposerExtensionStubsInspection */
4058 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4059
        }
4060
4061
        return self::regex_replace($str, $pattern, '', '', '/');
4062
    }
4063
4064
    /**
4065
     * Returns the UTF-8 character with the maximum code point in the given data.
4066
     *
4067
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4068
     *
4069
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4070
     */
4071 2
    public static function max($arg)
4072
    {
4073 2
        if (\is_array($arg) === true) {
4074 2
            $arg = \implode('', $arg);
4075
        }
4076
4077 2
        $codepoints = self::codepoints($arg, false);
4078 2
        if (\count($codepoints) === 0) {
4079 2
            return null;
4080
        }
4081
4082 2
        $codepoint_max = \max($codepoints);
4083
4084 2
        return self::chr($codepoint_max);
4085
    }
4086
4087
    /**
4088
     * Calculates and returns the maximum number of bytes taken by any
4089
     * UTF-8 encoded character in the given string.
4090
     *
4091
     * @param string $str <p>The original Unicode string.</p>
4092
     *
4093
     * @return int max byte lengths of the given chars
4094
     */
4095 2
    public static function max_chr_width(string $str): int
4096
    {
4097 2
        $bytes = self::chr_size_list($str);
4098 2
        if (\count($bytes) > 0) {
4099 2
            return (int) \max($bytes);
4100
        }
4101
4102 2
        return 0;
4103
    }
4104
4105
    /**
4106
     * Checks whether mbstring is available on the server.
4107
     *
4108
     * @return bool
4109
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4110
     */
4111 27
    public static function mbstring_loaded(): bool
4112
    {
4113 27
        return \extension_loaded('mbstring');
4114
    }
4115
4116
    /**
4117
     * Returns the UTF-8 character with the minimum code point in the given data.
4118
     *
4119
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4120
     *
4121
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4122
     */
4123 2
    public static function min($arg)
4124
    {
4125 2
        if (\is_array($arg) === true) {
4126 2
            $arg = \implode('', $arg);
4127
        }
4128
4129 2
        $codepoints = self::codepoints($arg, false);
4130 2
        if (\count($codepoints) === 0) {
4131 2
            return null;
4132
        }
4133
4134 2
        $codepoint_min = \min($codepoints);
4135
4136 2
        return self::chr($codepoint_min);
4137
    }
4138
4139
    /**
4140
     * alias for "UTF8::normalize_encoding()"
4141
     *
4142
     * @see        UTF8::normalize_encoding()
4143
     *
4144
     * @param mixed $encoding
4145
     * @param mixed $fallback
4146
     *
4147
     * @return mixed
4148
     *
4149
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4150
     */
4151 2
    public static function normalizeEncoding($encoding, $fallback = '')
4152
    {
4153 2
        return self::normalize_encoding($encoding, $fallback);
4154
    }
4155
4156
    /**
4157
     * Normalize the encoding-"name" input.
4158
     *
4159
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4160
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4161
     *
4162
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4163
     */
4164 323
    public static function normalize_encoding($encoding, $fallback = '')
4165
    {
4166 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4167
4168
        // init
4169 323
        $encoding = (string) $encoding;
4170
4171 323
        if (!$encoding) {
4172 278
            return $fallback;
4173
        }
4174
4175
        if (
4176 50
            $encoding === 'UTF-8'
4177
            ||
4178 50
            $encoding === 'UTF8'
4179
        ) {
4180 24
            return 'UTF-8';
4181
        }
4182
4183
        if (
4184 43
            $encoding === '8BIT'
4185
            ||
4186 43
            $encoding === 'BINARY'
4187
        ) {
4188
            return 'CP850';
4189
        }
4190
4191
        if (
4192 43
            $encoding === 'HTML'
4193
            ||
4194 43
            $encoding === 'HTML-ENTITIES'
4195
        ) {
4196 2
            return 'HTML-ENTITIES';
4197
        }
4198
4199
        if (
4200 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4201
            ||
4202 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4203
        ) {
4204 1
            return $fallback;
4205
        }
4206
4207 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4208 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4209
        }
4210
4211 6
        if (self::$ENCODINGS === null) {
4212 1
            self::$ENCODINGS = self::getData('encodings');
4213
        }
4214
4215 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4216 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4217
4218 4
            return $encoding;
4219
        }
4220
4221 5
        $encodingOrig = $encoding;
4222 5
        $encoding = \strtoupper($encoding);
4223 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/u', '', $encoding);
4224
4225
        $equivalences = [
4226 5
            'ISO8859'     => 'ISO-8859-1',
4227
            'ISO88591'    => 'ISO-8859-1',
4228
            'ISO'         => 'ISO-8859-1',
4229
            'LATIN'       => 'ISO-8859-1',
4230
            'LATIN1'      => 'ISO-8859-1', // Western European
4231
            'ISO88592'    => 'ISO-8859-2',
4232
            'LATIN2'      => 'ISO-8859-2', // Central European
4233
            'ISO88593'    => 'ISO-8859-3',
4234
            'LATIN3'      => 'ISO-8859-3', // Southern European
4235
            'ISO88594'    => 'ISO-8859-4',
4236
            'LATIN4'      => 'ISO-8859-4', // Northern European
4237
            'ISO88595'    => 'ISO-8859-5',
4238
            'ISO88596'    => 'ISO-8859-6', // Greek
4239
            'ISO88597'    => 'ISO-8859-7',
4240
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4241
            'ISO88599'    => 'ISO-8859-9',
4242
            'LATIN5'      => 'ISO-8859-9', // Turkish
4243
            'ISO885911'   => 'ISO-8859-11',
4244
            'TIS620'      => 'ISO-8859-11', // Thai
4245
            'ISO885910'   => 'ISO-8859-10',
4246
            'LATIN6'      => 'ISO-8859-10', // Nordic
4247
            'ISO885913'   => 'ISO-8859-13',
4248
            'LATIN7'      => 'ISO-8859-13', // Baltic
4249
            'ISO885914'   => 'ISO-8859-14',
4250
            'LATIN8'      => 'ISO-8859-14', // Celtic
4251
            'ISO885915'   => 'ISO-8859-15',
4252
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4253
            'ISO885916'   => 'ISO-8859-16',
4254
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4255
            'CP1250'      => 'WINDOWS-1250',
4256
            'WIN1250'     => 'WINDOWS-1250',
4257
            'WINDOWS1250' => 'WINDOWS-1250',
4258
            'CP1251'      => 'WINDOWS-1251',
4259
            'WIN1251'     => 'WINDOWS-1251',
4260
            'WINDOWS1251' => 'WINDOWS-1251',
4261
            'CP1252'      => 'WINDOWS-1252',
4262
            'WIN1252'     => 'WINDOWS-1252',
4263
            'WINDOWS1252' => 'WINDOWS-1252',
4264
            'CP1253'      => 'WINDOWS-1253',
4265
            'WIN1253'     => 'WINDOWS-1253',
4266
            'WINDOWS1253' => 'WINDOWS-1253',
4267
            'CP1254'      => 'WINDOWS-1254',
4268
            'WIN1254'     => 'WINDOWS-1254',
4269
            'WINDOWS1254' => 'WINDOWS-1254',
4270
            'CP1255'      => 'WINDOWS-1255',
4271
            'WIN1255'     => 'WINDOWS-1255',
4272
            'WINDOWS1255' => 'WINDOWS-1255',
4273
            'CP1256'      => 'WINDOWS-1256',
4274
            'WIN1256'     => 'WINDOWS-1256',
4275
            'WINDOWS1256' => 'WINDOWS-1256',
4276
            'CP1257'      => 'WINDOWS-1257',
4277
            'WIN1257'     => 'WINDOWS-1257',
4278
            'WINDOWS1257' => 'WINDOWS-1257',
4279
            'CP1258'      => 'WINDOWS-1258',
4280
            'WIN1258'     => 'WINDOWS-1258',
4281
            'WINDOWS1258' => 'WINDOWS-1258',
4282
            'UTF16'       => 'UTF-16',
4283
            'UTF32'       => 'UTF-32',
4284
            'UTF8'        => 'UTF-8',
4285
            'UTF'         => 'UTF-8',
4286
            'UTF7'        => 'UTF-7',
4287
            '8BIT'        => 'CP850',
4288
            'BINARY'      => 'CP850',
4289
        ];
4290
4291 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4292 4
            $encoding = $equivalences[$encodingUpperHelper];
4293
        }
4294
4295 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4296
4297 5
        return $encoding;
4298
    }
4299
4300
    /**
4301
     * Standardize line ending to unix-like.
4302
     *
4303
     * @param string $str
4304
     *
4305
     * @return string
4306
     */
4307 5
    public static function normalize_line_ending(string $str): string
4308
    {
4309 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4310
    }
4311
4312
    /**
4313
     * Normalize some MS Word special characters.
4314
     *
4315
     * @param string $str <p>The string to be normalized.</p>
4316
     *
4317
     * @return string
4318
     */
4319 38
    public static function normalize_msword(string $str): string
4320
    {
4321 38
        if ($str === '') {
4322 2
            return '';
4323
        }
4324
4325
        $keys = [
4326 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4327
            "\xc2\xbb", // » (U+00BB) in UTF-8
4328
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4329
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4330
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4331
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4332
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4333
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4334
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4335
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4336
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4337
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4338
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4339
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4340
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4341
        ];
4342
4343
        $values = [
4344 38
            '"', // « (U+00AB) in UTF-8
4345
            '"', // » (U+00BB) in UTF-8
4346
            "'", // ‘ (U+2018) in UTF-8
4347
            "'", // ’ (U+2019) in UTF-8
4348
            "'", // ‚ (U+201A) in UTF-8
4349
            "'", // ‛ (U+201B) in UTF-8
4350
            '"', // “ (U+201C) in UTF-8
4351
            '"', // ” (U+201D) in UTF-8
4352
            '"', // „ (U+201E) in UTF-8
4353
            '"', // ‟ (U+201F) in UTF-8
4354
            "'", // ‹ (U+2039) in UTF-8
4355
            "'", // › (U+203A) in UTF-8
4356
            '-', // – (U+2013) in UTF-8
4357
            '-', // — (U+2014) in UTF-8
4358
            '...', // … (U+2026) in UTF-8
4359
        ];
4360
4361 38
        return \str_replace($keys, $values, $str);
4362
    }
4363
4364
    /**
4365
     * Normalize the whitespace.
4366
     *
4367
     * @param string $str                     <p>The string to be normalized.</p>
4368
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4369
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4370
     *                                        bidirectional text chars.</p>
4371
     *
4372
     * @return string
4373
     */
4374 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4375
    {
4376 86
        if ($str === '') {
4377 9
            return '';
4378
        }
4379
4380 86
        static $WHITESPACE_CACHE = [];
4381 86
        $cacheKey = (int) $keepNonBreakingSpace;
4382
4383 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4384 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4385
4386 2
            if ($keepNonBreakingSpace === true) {
4387 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4388
            }
4389
4390 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4391
        }
4392
4393 86
        if ($keepBidiUnicodeControls === false) {
4394 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4395
4396 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4397 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4398
            }
4399
4400 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4401
        }
4402
4403 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4404
    }
4405
4406
    /**
4407
     * Calculates Unicode code point of the given UTF-8 encoded character.
4408
     *
4409
     * INFO: opposite to UTF8::chr()
4410
     *
4411
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4412
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4413
     *
4414
     * @return int
4415
     *             Unicode code point of the given character,<br>
4416
     *             0 on invalid UTF-8 byte sequence
4417
     */
4418 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4419
    {
4420 30
        static $CHAR_CACHE = [];
4421
4422
        // init
4423 30
        $chr = (string) $chr;
4424
4425 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4426 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4427
        }
4428
4429 30
        $cacheKey = $chr . $encoding;
4430 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4431 30
            return $CHAR_CACHE[$cacheKey];
4432
        }
4433
4434
        // check again, if it's still not UTF-8
4435 12
        if ($encoding !== 'UTF-8') {
4436 3
            $chr = self::encode($encoding, $chr);
4437
        }
4438
4439 12
        if (self::$ORD === null) {
4440
            self::$ORD = self::getData('ord');
4441
        }
4442
4443 12
        if (isset(self::$ORD[$chr])) {
4444 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4445
        }
4446
4447
        //
4448
        // fallback via "IntlChar"
4449
        //
4450
4451 6
        if (self::$SUPPORT['intlChar'] === true) {
4452
            /** @noinspection PhpComposerExtensionStubsInspection */
4453 5
            $code = \IntlChar::ord($chr);
4454 5
            if ($code) {
4455 5
                return $CHAR_CACHE[$cacheKey] = $code;
4456
            }
4457
        }
4458
4459
        //
4460
        // fallback via vanilla php
4461
        //
4462
4463
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4464 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4465 1
        $code = $chr ? $chr[1] : 0;
4466
4467 1
        if ($code >= 0xF0 && isset($chr[4])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4470
        }
4471
4472 1
        if ($code >= 0xE0 && isset($chr[3])) {
4473
            /** @noinspection UnnecessaryCastingInspection */
4474 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4475
        }
4476
4477 1
        if ($code >= 0xC0 && isset($chr[2])) {
4478
            /** @noinspection UnnecessaryCastingInspection */
4479 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4480
        }
4481
4482
        return $CHAR_CACHE[$cacheKey] = $code;
4483
    }
4484
4485
    /**
4486
     * Parses the string into an array (into the the second parameter).
4487
     *
4488
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4489
     *          if the second parameter is not set!
4490
     *
4491
     * @see http://php.net/manual/en/function.parse-str.php
4492
     *
4493
     * @param string $str       <p>The input string.</p>
4494
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4495
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4496
     *
4497
     * @return bool
4498
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4499
     */
4500 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4501
    {
4502 2
        if ($cleanUtf8 === true) {
4503 2
            $str = self::clean($str);
4504
        }
4505
4506 2
        if (self::$SUPPORT['mbstring'] === true) {
4507 2
            $return = \mb_parse_str($str, $result);
4508
4509 2
            return $return !== false && $result !== [];
4510
        }
4511
4512
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4513
        \parse_str($str, $result);
4514
4515
        return $result !== [];
4516
    }
4517
4518
    /**
4519
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4520
     *
4521
     * @return bool
4522
     *              <strong>true</strong> if support is available,<br>
4523
     *              <strong>false</strong> otherwise
4524
     */
4525 102
    public static function pcre_utf8_support(): bool
4526
    {
4527
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4528 102
        return (bool) @\preg_match('//u', '');
4529
    }
4530
4531
    /**
4532
     * Create an array containing a range of UTF-8 characters.
4533
     *
4534
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4535
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4536
     *
4537
     * @return string[]
4538
     */
4539 2
    public static function range($var1, $var2): array
4540
    {
4541 2
        if (!$var1 || !$var2) {
4542 2
            return [];
4543
        }
4544
4545 2
        if (self::$SUPPORT['ctype'] === false) {
4546
            throw new \RuntimeException('ext-ctype: is not installed');
4547
        }
4548
4549
        /** @noinspection PhpComposerExtensionStubsInspection */
4550 2
        if (\ctype_digit((string) $var1)) {
4551 2
            $start = (int) $var1;
4552 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4553
            $start = (int) self::hex_to_int($var1);
4554
        } else {
4555 2
            $start = self::ord($var1);
4556
        }
4557
4558 2
        if (!$start) {
4559
            return [];
4560
        }
4561
4562
        /** @noinspection PhpComposerExtensionStubsInspection */
4563 2
        if (\ctype_digit((string) $var2)) {
4564 2
            $end = (int) $var2;
4565 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4566
            $end = (int) self::hex_to_int($var2);
4567
        } else {
4568 2
            $end = self::ord($var2);
4569
        }
4570
4571 2
        if (!$end) {
4572
            return [];
4573
        }
4574
4575 2
        return \array_map(
4576
            static function (int $i): string {
4577 2
                return (string) self::chr($i);
4578 2
            },
4579 2
            \range($start, $end)
4580
        );
4581
    }
4582
4583
    /**
4584
     * Multi decode html entity & fix urlencoded-win1252-chars.
4585
     *
4586
     * e.g:
4587
     * 'test+test'                     => 'test+test'
4588
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4589
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4590
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4591
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4592
     * 'Düsseldorf'                   => 'Düsseldorf'
4593
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4594
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4595
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4596
     *
4597
     * @param string $str          <p>The input string.</p>
4598
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4599
     *
4600
     * @return string
4601
     */
4602 4
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4603
    {
4604 4
        if ($str === '') {
4605 2
            return '';
4606
        }
4607
4608
        if (
4609 4
            \strpos($str, '&') === false
4610
            &&
4611 4
            \strpos($str, '%') === false
4612
            &&
4613 4
            \strpos($str, '+') === false
4614
            &&
4615 4
            \strpos($str, '\u') === false
4616
        ) {
4617 2
            return self::fix_simple_utf8($str);
4618
        }
4619
4620 4
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
4621 4
        if (\preg_match($pattern, $str)) {
4622 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4623
        }
4624
4625 4
        $flags = \ENT_QUOTES | \ENT_HTML5;
4626
4627 4
        if ($multi_decode === true) {
4628
            do {
4629 4
                $str_compare = $str;
4630
4631
                /**
4632
                 * @psalm-suppress PossiblyInvalidArgument
4633
                 */
4634 4
                $str = self::fix_simple_utf8(
4635 4
                    \rawurldecode(
4636 4
                        self::html_entity_decode(
4637 4
                            self::to_utf8($str),
4638 4
                            $flags
4639
                        )
4640
                    )
4641
                );
4642 4
            } while ($str_compare !== $str);
4643
        }
4644
4645 4
        return $str;
4646
    }
4647
4648
    /**
4649
     * Replaces all occurrences of $pattern in $str by $replacement.
4650
     *
4651
     * @param string $str         <p>The input string.</p>
4652
     * @param string $pattern     <p>The regular expression pattern.</p>
4653
     * @param string $replacement <p>The string to replace with.</p>
4654
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4655
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4656
     *
4657
     * @return string
4658
     */
4659 18
    public static function regex_replace(
4660
        string $str,
4661
        string $pattern,
4662
        string $replacement,
4663
        string $options = '',
4664
        string $delimiter = '/'
4665
    ): string {
4666 18
        if ($options === 'msr') {
4667 9
            $options = 'ms';
4668
        }
4669
4670
        // fallback
4671 18
        if (!$delimiter) {
4672
            $delimiter = '/';
4673
        }
4674
4675 18
        return (string) \preg_replace(
4676 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4677 18
            $replacement,
4678 18
            $str
4679
        );
4680
    }
4681
4682
    /**
4683
     * alias for "UTF8::remove_bom()"
4684
     *
4685
     * @see        UTF8::remove_bom()
4686
     *
4687
     * @param string $str
4688
     *
4689
     * @return string
4690
     *
4691
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4692
     */
4693
    public static function removeBOM(string $str): string
4694
    {
4695
        return self::remove_bom($str);
4696
    }
4697
4698
    /**
4699
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4700
     *
4701
     * @param string $str <p>The input string.</p>
4702
     *
4703
     * @return string string without UTF-BOM
4704
     */
4705 82
    public static function remove_bom(string $str): string
4706
    {
4707 82
        if ($str === '') {
4708 9
            return '';
4709
        }
4710
4711 82
        $strLength = \strlen($str);
4712 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4713 82
            if (\strpos($str, $bomString, 0) === 0) {
4714 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4715 10
                if ($strTmp === false) {
4716
                    return '';
4717
                }
4718
4719 10
                $strLength -= (int) $bomByteLength;
4720
4721 82
                $str = (string) $strTmp;
4722
            }
4723
        }
4724
4725 82
        return $str;
4726
    }
4727
4728
    /**
4729
     * Removes duplicate occurrences of a string in another string.
4730
     *
4731
     * @param string          $str  <p>The base string.</p>
4732
     * @param string|string[] $what <p>String to search for in the base string.</p>
4733
     *
4734
     * @return string the result string with removed duplicates
4735
     */
4736 2
    public static function remove_duplicates(string $str, $what = ' '): string
4737
    {
4738 2
        if (\is_string($what) === true) {
4739 2
            $what = [$what];
4740
        }
4741
4742 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4743
            /** @noinspection ForeachSourceInspection */
4744 2
            foreach ($what as $item) {
4745 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4746
            }
4747
        }
4748
4749 2
        return $str;
4750
    }
4751
4752
    /**
4753
     * Remove html via "strip_tags()" from the string.
4754
     *
4755
     * @param string $str
4756
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4757
     *                              not be stripped. Default: null
4758
     *                              </p>
4759
     *
4760
     * @return string
4761
     */
4762 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4763
    {
4764 6
        return \strip_tags($str, $allowableTags);
4765
    }
4766
4767
    /**
4768
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4769
     *
4770
     * @param string $str
4771
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4772
     *
4773
     * @return string
4774
     */
4775 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4776
    {
4777 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4778
    }
4779
4780
    /**
4781
     * Remove invisible characters from a string.
4782
     *
4783
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4784
     *
4785
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4786
     *
4787
     * @param string $str
4788
     * @param bool   $url_encoded
4789
     * @param string $replacement
4790
     *
4791
     * @return string
4792
     */
4793 116
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4794
    {
4795
        // init
4796 116
        $non_displayables = [];
4797
4798
        // every control character except newline (dec 10),
4799
        // carriage return (dec 13) and horizontal tab (dec 09)
4800 116
        if ($url_encoded) {
4801 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4802 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4803
        }
4804
4805 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4806
4807
        do {
4808 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4809 116
        } while ($count !== 0);
4810
4811 116
        return $str;
4812
    }
4813
4814
    /**
4815
     * Returns a new string with the prefix $substring removed, if present.
4816
     *
4817
     * @param string $str
4818
     * @param string $substring <p>The prefix to remove.</p>
4819
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4820
     *
4821
     * @return string string without the prefix $substring
4822
     */
4823 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4824
    {
4825 12
        if ($substring && \strpos($str, $substring) === 0) {
4826 6
            if ($encoding === 'UTF-8') {
4827 4
                return (string) \mb_substr(
4828 4
                    $str,
4829 4
                    (int) \mb_strlen($substring)
4830
                );
4831
            }
4832
4833 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4834
4835 2
            return (string) self::substr(
4836 2
                $str,
4837 2
                (int) self::strlen($substring, $encoding),
4838 2
                null,
4839 2
                $encoding
4840
            );
4841
        }
4842
4843 6
        return $str;
4844
    }
4845
4846
    /**
4847
     * Returns a new string with the suffix $substring removed, if present.
4848
     *
4849
     * @param string $str
4850
     * @param string $substring <p>The suffix to remove.</p>
4851
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4852
     *
4853
     * @return string string having a $str without the suffix $substring
4854
     */
4855 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4856
    {
4857 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4858 6
            if ($encoding === 'UTF-8') {
4859 4
                return (string) \mb_substr(
4860 4
                    $str,
4861 4
                    0,
4862 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4863
                );
4864
            }
4865
4866 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4867
4868 2
            return (string) self::substr(
4869 2
                $str,
4870 2
                0,
4871 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4872 2
                $encoding
4873
            );
4874
        }
4875
4876 6
        return $str;
4877
    }
4878
4879
    /**
4880
     * Replaces all occurrences of $search in $str by $replacement.
4881
     *
4882
     * @param string $str           <p>The input string.</p>
4883
     * @param string $search        <p>The needle to search for.</p>
4884
     * @param string $replacement   <p>The string to replace with.</p>
4885
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4886
     *
4887
     * @return string string after the replacements
4888
     */
4889 29
    public static function replace(
4890
        string $str,
4891
        string $search,
4892
        string $replacement,
4893
        bool $caseSensitive = true
4894
    ): string {
4895 29
        if ($caseSensitive) {
4896 22
            return \str_replace($search, $replacement, $str);
4897
        }
4898
4899 7
        return self::str_ireplace($search, $replacement, $str);
4900
    }
4901
4902
    /**
4903
     * Replaces all occurrences of $search in $str by $replacement.
4904
     *
4905
     * @param string       $str           <p>The input string.</p>
4906
     * @param array        $search        <p>The elements to search for.</p>
4907
     * @param array|string $replacement   <p>The string to replace with.</p>
4908
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4909
     *
4910
     * @return string string after the replacements
4911
     */
4912 30
    public static function replace_all(
4913
        string $str,
4914
        array $search,
4915
        $replacement,
4916
        bool $caseSensitive = true
4917
    ): string {
4918 30
        if ($caseSensitive) {
4919 23
            return \str_replace($search, $replacement, $str);
4920
        }
4921
4922 7
        return self::str_ireplace($search, $replacement, $str);
4923
    }
4924
4925
    /**
4926
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4927
     *
4928
     * @param string $str                <p>The input string</p>
4929
     * @param string $replacementChar    <p>The replacement character.</p>
4930
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4931
     *
4932
     * @return string
4933
     */
4934 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4935
    {
4936 62
        if ($str === '') {
4937 9
            return '';
4938
        }
4939
4940 62
        if ($processInvalidUtf8 === true) {
4941 62
            $replacementCharHelper = $replacementChar;
4942 62
            if ($replacementChar === '') {
4943 62
                $replacementCharHelper = 'none';
4944
            }
4945
4946 62
            if (self::$SUPPORT['mbstring'] === false) {
4947
                // if there is no native support for "mbstring",
4948
                // then we need to clean the string before ...
4949
                $str = self::clean($str);
4950
            }
4951
4952 62
            $save = \mb_substitute_character();
4953 62
            \mb_substitute_character($replacementCharHelper);
4954
            // the polyfill maybe return false, so cast to string
4955 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4956 62
            \mb_substitute_character($save);
4957
        }
4958
4959 62
        return \str_replace(
4960
            [
4961 62
                "\xEF\xBF\xBD",
4962
                '�',
4963
            ],
4964
            [
4965 62
                $replacementChar,
4966 62
                $replacementChar,
4967
            ],
4968 62
            $str
4969
        );
4970
    }
4971
4972
    /**
4973
     * Strip whitespace or other characters from end of a UTF-8 string.
4974
     *
4975
     * @param string      $str   <p>The string to be trimmed.</p>
4976
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4977
     *
4978
     * @return string the string with unwanted characters stripped from the right
4979
     */
4980 20
    public static function rtrim(string $str = '', string $chars = null): string
4981
    {
4982 20
        if ($str === '') {
4983 3
            return '';
4984
        }
4985
4986 19
        if ($chars) {
4987 8
            $chars = \preg_quote($chars, '/');
4988 8
            $pattern = "[${chars}]+\$";
4989
        } else {
4990 14
            $pattern = "[\s]+\$";
4991
        }
4992
4993 19
        if (self::$SUPPORT['mbstring'] === true) {
4994
            /** @noinspection PhpComposerExtensionStubsInspection */
4995 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4996
        }
4997
4998
        return self::regex_replace($str, $pattern, '', '', '/');
4999
    }
5000
5001
    /**
5002
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
5003
     *
5004
     * @psalm-suppress MissingReturnType
5005
     */
5006 2
    public static function showSupport()
5007
    {
5008 2
        echo '<pre>';
5009 2
        foreach (self::$SUPPORT as $key => &$value) {
5010 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5011
        }
5012 2
        unset($value);
5013 2
        echo '</pre>';
5014 2
    }
5015
5016
    /**
5017
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5018
     *
5019
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5020
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5021
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5022
     *
5023
     * @return string the HTML numbered entity
5024
     */
5025 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5026
    {
5027 2
        if ($char === '') {
5028 2
            return '';
5029
        }
5030
5031
        if (
5032 2
            $keepAsciiChars === true
5033
            &&
5034 2
            self::is_ascii($char) === true
5035
        ) {
5036 2
            return $char;
5037
        }
5038
5039 2
        return '&#' . self::ord($char, $encoding) . ';';
5040
    }
5041
5042
    /**
5043
     * @param string $str
5044
     * @param int    $tabLength
5045
     *
5046
     * @return string
5047
     */
5048 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5049
    {
5050 5
        if ($tabLength === 4) {
5051 3
            $tab = '    ';
5052 2
        } elseif ($tabLength === 2) {
5053 1
            $tab = '  ';
5054
        } else {
5055 1
            $tab = \str_repeat(' ', $tabLength);
5056
        }
5057
5058 5
        return \str_replace($tab, "\t", $str);
5059
    }
5060
5061
    /**
5062
     * Convert a string to an array of Unicode characters.
5063
     *
5064
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
5065
     * @param int                       $length             [optional] <p>Max character length of each array
5066
     *                                                      element.</p>
5067
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
5068
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
5069
     *                                                      "mb_substr"</p>
5070
     *
5071
     * @return array
5072
     *               <p>An array containing chunks of the input.</p>
5073
     */
5074 87
    public static function str_split(
5075
        $str,
5076
        int $length = 1,
5077
        bool $cleanUtf8 = false,
5078
        bool $tryToUseMbFunction = true
5079
    ): array {
5080 87
        if ($length <= 0) {
5081 3
            return [];
5082
        }
5083
5084 86
        if (\is_array($str) === true) {
5085 2
            foreach ($str as $k => &$v) {
5086 2
                $v = self::str_split(
5087 2
                    $v,
5088 2
                    $length,
5089 2
                    $cleanUtf8,
5090 2
                    $tryToUseMbFunction
5091
                );
5092
            }
5093
5094 2
            return $str;
5095
        }
5096
5097
        // init
5098 86
        $str = (string) $str;
5099
5100 86
        if ($str === '') {
5101 13
            return [];
5102
        }
5103
5104 83
        if ($cleanUtf8 === true) {
5105 19
            $str = self::clean($str);
5106
        }
5107
5108
        if (
5109 83
            $tryToUseMbFunction === true
5110
            &&
5111 83
            self::$SUPPORT['mbstring'] === true
5112
        ) {
5113 79
            $iMax = \mb_strlen($str);
5114 79
            if ($iMax <= 127) {
5115 73
                $ret = [];
5116 73
                for ($i = 0; $i < $iMax; ++$i) {
5117 73
                    $ret[] = \mb_substr($str, $i, 1);
5118
                }
5119
            } else {
5120 15
                $retArray = [];
5121 15
                \preg_match_all('/./us', $str, $retArray);
5122 79
                $ret = $retArray[0] ?? [];
5123
            }
5124 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5125 17
            $retArray = [];
5126 17
            \preg_match_all('/./us', $str, $retArray);
5127 17
            $ret = $retArray[0] ?? [];
5128
        } else {
5129
5130
            // fallback
5131
5132 8
            $ret = [];
5133 8
            $len = \strlen($str);
5134
5135
            /** @noinspection ForeachInvariantsInspection */
5136 8
            for ($i = 0; $i < $len; ++$i) {
5137 8
                if (($str[$i] & "\x80") === "\x00") {
5138 8
                    $ret[] = $str[$i];
5139
                } elseif (
5140 8
                    isset($str[$i + 1])
5141
                    &&
5142 8
                    ($str[$i] & "\xE0") === "\xC0"
5143
                ) {
5144 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5145 4
                        $ret[] = $str[$i] . $str[$i + 1];
5146
5147 4
                        ++$i;
5148
                    }
5149
                } elseif (
5150 6
                    isset($str[$i + 2])
5151
                    &&
5152 6
                    ($str[$i] & "\xF0") === "\xE0"
5153
                ) {
5154
                    if (
5155 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5156
                        &&
5157 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5158
                    ) {
5159 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5160
5161 6
                        $i += 2;
5162
                    }
5163
                } elseif (
5164
                    isset($str[$i + 3])
5165
                    &&
5166
                    ($str[$i] & "\xF8") === "\xF0"
5167
                ) {
5168
                    if (
5169
                        ($str[$i + 1] & "\xC0") === "\x80"
5170
                        &&
5171
                        ($str[$i + 2] & "\xC0") === "\x80"
5172
                        &&
5173
                        ($str[$i + 3] & "\xC0") === "\x80"
5174
                    ) {
5175
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5176
5177
                        $i += 3;
5178
                    }
5179
                }
5180
            }
5181
        }
5182
5183 83
        if ($length > 1) {
5184 11
            $ret = \array_chunk($ret, $length);
5185
5186 11
            return \array_map(
5187
                static function (array &$item): string {
5188 11
                    return \implode('', $item);
5189 11
                },
5190 11
                $ret
5191
            );
5192
        }
5193
5194 76
        if (isset($ret[0]) && $ret[0] === '') {
5195
            return [];
5196
        }
5197
5198 76
        return $ret;
5199
    }
5200
5201
    /**
5202
     * Returns a camelCase version of the string. Trims surrounding spaces,
5203
     * capitalizes letters following digits, spaces, dashes and underscores,
5204
     * and removes spaces, dashes, as well as underscores.
5205
     *
5206
     * @param string      $str                   <p>The input string.</p>
5207
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5208
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5209
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5210
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5211
     *
5212
     * @return string
5213
     */
5214 32
    public static function str_camelize(
5215
        string $str,
5216
        string $encoding = 'UTF-8',
5217
        bool $cleanUtf8 = false,
5218
        string $lang = null,
5219
        bool $tryToKeepStringLength = false
5220
    ): string {
5221 32
        if ($cleanUtf8 === true) {
5222
            $str = self::clean($str);
5223
        }
5224
5225 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5226 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5227
        }
5228
5229 32
        $str = self::lcfirst(
5230 32
            \trim($str),
5231 32
            $encoding,
5232 32
            false,
5233 32
            $lang,
5234 32
            $tryToKeepStringLength
5235
        );
5236 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5237
5238 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5239
5240 32
        $str = (string) \preg_replace_callback(
5241 32
            '/[-_\s]+(.)?/u',
5242
            /**
5243
             * @param array $match
5244
             *
5245
             * @return string
5246
             */
5247
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5248 27
                if (isset($match[1])) {
5249 27
                    if ($useMbFunction === true) {
5250 27
                        if ($encoding === 'UTF-8') {
5251 27
                            return \mb_strtoupper($match[1]);
5252
                        }
5253
5254
                        return \mb_strtoupper($match[1], $encoding);
5255
                    }
5256
5257
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5258
                }
5259
5260 1
                return '';
5261 32
            },
5262 32
            $str
5263
        );
5264
5265 32
        return (string) \preg_replace_callback(
5266 32
            '/[\d]+(.)?/u',
5267
            /**
5268
             * @param array $match
5269
             *
5270
             * @return string
5271
             */
5272
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5273 6
                if ($useMbFunction === true) {
5274 6
                    if ($encoding === 'UTF-8') {
5275 6
                        return \mb_strtoupper($match[0]);
5276
                    }
5277
5278
                    return \mb_strtoupper($match[0], $encoding);
5279
                }
5280
5281
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5282 32
            },
5283 32
            $str
5284
        );
5285
    }
5286
5287
    /**
5288
     * Returns the string with the first letter of each word capitalized,
5289
     * except for when the word is a name which shouldn't be capitalized.
5290
     *
5291
     * @param string $str
5292
     *
5293
     * @return string string with $str capitalized
5294
     */
5295 1
    public static function str_capitalize_name(string $str): string
5296
    {
5297 1
        return self::str_capitalize_name_helper(
5298 1
            self::str_capitalize_name_helper(
5299 1
                self::collapse_whitespace($str),
5300 1
                ' '
5301
            ),
5302 1
            '-'
5303
        );
5304
    }
5305
5306
    /**
5307
     * Returns true if the string contains $needle, false otherwise. By default
5308
     * the comparison is case-sensitive, but can be made insensitive by setting
5309
     * $caseSensitive to false.
5310
     *
5311
     * @param string $haystack      <p>The input string.</p>
5312
     * @param string $needle        <p>Substring to look for.</p>
5313
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5314
     *
5315
     * @return bool whether or not $haystack contains $needle
5316
     */
5317 21
    public static function str_contains(
5318
        string $haystack,
5319
        string $needle,
5320
        bool $caseSensitive = true
5321
    ): bool {
5322 21
        if ($caseSensitive) {
5323 11
            return \strpos($haystack, $needle) !== false;
5324
        }
5325
5326 10
        return \mb_stripos($haystack, $needle) !== false;
5327
    }
5328
5329
    /**
5330
     * Returns true if the string contains all $needles, false otherwise. By
5331
     * default the comparison is case-sensitive, but can be made insensitive by
5332
     * setting $caseSensitive to false.
5333
     *
5334
     * @param string $haystack      <p>The input string.</p>
5335
     * @param array  $needles       <p>SubStrings to look for.</p>
5336
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5337
     *
5338
     * @return bool whether or not $haystack contains $needle
5339
     */
5340 44
    public static function str_contains_all(
5341
        string $haystack,
5342
        array $needles,
5343
        bool $caseSensitive = true
5344
    ): bool {
5345 44
        if ($haystack === '' || $needles === []) {
5346 1
            return false;
5347
        }
5348
5349
        /** @noinspection LoopWhichDoesNotLoopInspection */
5350 43
        foreach ($needles as &$needle) {
5351 43
            if (!$needle) {
5352 1
                return false;
5353
            }
5354
5355 42
            if ($caseSensitive) {
5356 22
                return \strpos($haystack, $needle) !== false;
5357
            }
5358
5359 20
            return \mb_stripos($haystack, $needle) !== false;
5360
        }
5361
5362
        return true;
5363
    }
5364
5365
    /**
5366
     * Returns true if the string contains any $needles, false otherwise. By
5367
     * default the comparison is case-sensitive, but can be made insensitive by
5368
     * setting $caseSensitive to false.
5369
     *
5370
     * @param string $haystack      <p>The input string.</p>
5371
     * @param array  $needles       <p>SubStrings to look for.</p>
5372
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5373
     *
5374
     * @return bool
5375
     *              Whether or not $str contains $needle
5376
     */
5377 43
    public static function str_contains_any(
5378
        string $haystack,
5379
        array $needles,
5380
        bool $caseSensitive = true
5381
    ): bool {
5382 43
        if ($haystack === '' || $needles === []) {
5383 1
            return false;
5384
        }
5385
5386
        /** @noinspection LoopWhichDoesNotLoopInspection */
5387 42
        foreach ($needles as &$needle) {
5388 42
            if (!$needle) {
5389
                return false;
5390
            }
5391
5392 42
            if ($caseSensitive) {
5393 22
                return \strpos($haystack, $needle) !== false;
5394
            }
5395
5396 20
            return \mb_stripos($haystack, $needle) !== false;
5397
        }
5398
5399
        return false;
5400
    }
5401
5402
    /**
5403
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5404
     * inserted before uppercase characters (with the exception of the first
5405
     * character of the string), and in place of spaces as well as underscores.
5406
     *
5407
     * @param string $str      <p>The input string.</p>
5408
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5409
     *
5410
     * @return string
5411
     */
5412 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5413
    {
5414 19
        return self::str_delimit($str, '-', $encoding);
5415
    }
5416
5417
    /**
5418
     * Returns a lowercase and trimmed string separated by the given delimiter.
5419
     * Delimiters are inserted before uppercase characters (with the exception
5420
     * of the first character of the string), and in place of spaces, dashes,
5421
     * and underscores. Alpha delimiters are not converted to lowercase.
5422
     *
5423
     * @param string      $str                   <p>The input string.</p>
5424
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5425
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5426
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5427
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5428
     *                                           tr</p>
5429
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5430
     *                                           ß</p>
5431
     *
5432
     * @return string
5433
     */
5434 49
    public static function str_delimit(
5435
        string $str,
5436
        string $delimiter,
5437
        string $encoding = 'UTF-8',
5438
        bool $cleanUtf8 = false,
5439
        string $lang = null,
5440
        bool $tryToKeepStringLength = false
5441
    ): string {
5442 49
        if (self::$SUPPORT['mbstring'] === true) {
5443
            /** @noinspection PhpComposerExtensionStubsInspection */
5444 49
            $str = (string) \mb_ereg_replace('\B(\p{Lu})', '-\1', \trim($str));
5445
5446 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5447 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5448 22
                $str = \mb_strtolower($str);
5449
            } else {
5450 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5451
            }
5452
5453
            /** @noinspection PhpComposerExtensionStubsInspection */
5454 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5455
        }
5456
5457
        $str = (string) \preg_replace('/\B(\p{Lu})/u', '-\1', \trim($str));
5458
5459
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5460
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5461
            $str = \mb_strtolower($str);
5462
        } else {
5463
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5464
        }
5465
5466
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5467
    }
5468
5469
    /**
5470
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5471
     *
5472
     * @param string $str <p>The input string.</p>
5473
     *
5474
     * @return false|string
5475
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5476
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5477
     */
5478 30
    public static function str_detect_encoding($str)
5479
    {
5480
        // init
5481 30
        $str = (string) $str;
5482
5483
        //
5484
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5485
        //
5486
5487 30
        if (self::is_binary($str, true) === true) {
5488 11
            $isUtf16 = self::is_utf16($str, false);
5489 11
            if ($isUtf16 === 1) {
5490 2
                return 'UTF-16LE';
5491
            }
5492 11
            if ($isUtf16 === 2) {
5493 2
                return 'UTF-16BE';
5494
            }
5495
5496 9
            $isUtf32 = self::is_utf32($str, false);
5497 9
            if ($isUtf32 === 1) {
5498
                return 'UTF-32LE';
5499
            }
5500 9
            if ($isUtf32 === 2) {
5501
                return 'UTF-32BE';
5502
            }
5503
5504
            // is binary but not "UTF-16" or "UTF-32"
5505 9
            return false;
5506
        }
5507
5508
        //
5509
        // 2.) simple check for ASCII chars
5510
        //
5511
5512 26
        if (self::is_ascii($str) === true) {
5513 10
            return 'ASCII';
5514
        }
5515
5516
        //
5517
        // 3.) simple check for UTF-8 chars
5518
        //
5519
5520 26
        if (self::is_utf8($str) === true) {
5521 19
            return 'UTF-8';
5522
        }
5523
5524
        //
5525
        // 4.) check via "mb_detect_encoding()"
5526
        //
5527
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5528
5529
        $detectOrder = [
5530 15
            'ISO-8859-1',
5531
            'ISO-8859-2',
5532
            'ISO-8859-3',
5533
            'ISO-8859-4',
5534
            'ISO-8859-5',
5535
            'ISO-8859-6',
5536
            'ISO-8859-7',
5537
            'ISO-8859-8',
5538
            'ISO-8859-9',
5539
            'ISO-8859-10',
5540
            'ISO-8859-13',
5541
            'ISO-8859-14',
5542
            'ISO-8859-15',
5543
            'ISO-8859-16',
5544
            'WINDOWS-1251',
5545
            'WINDOWS-1252',
5546
            'WINDOWS-1254',
5547
            'CP932',
5548
            'CP936',
5549
            'CP950',
5550
            'CP866',
5551
            'CP850',
5552
            'CP51932',
5553
            'CP50220',
5554
            'CP50221',
5555
            'CP50222',
5556
            'ISO-2022-JP',
5557
            'ISO-2022-KR',
5558
            'JIS',
5559
            'JIS-ms',
5560
            'EUC-CN',
5561
            'EUC-JP',
5562
        ];
5563
5564 15
        if (self::$SUPPORT['mbstring'] === true) {
5565
            // info: do not use the symfony polyfill here
5566 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5567 15
            if ($encoding) {
5568 15
                return $encoding;
5569
            }
5570
        }
5571
5572
        //
5573
        // 5.) check via "iconv()"
5574
        //
5575
5576
        if (self::$ENCODINGS === null) {
5577
            self::$ENCODINGS = self::getData('encodings');
5578
        }
5579
5580
        foreach (self::$ENCODINGS as $encodingTmp) {
5581
            // INFO: //IGNORE but still throw notice
5582
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5583
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5584
                return $encodingTmp;
5585
            }
5586
        }
5587
5588
        return false;
5589
    }
5590
5591
    /**
5592
     * Check if the string ends with the given substring.
5593
     *
5594
     * @param string $haystack <p>The string to search in.</p>
5595
     * @param string $needle   <p>The substring to search for.</p>
5596
     *
5597
     * @return bool
5598
     */
5599 9
    public static function str_ends_with(string $haystack, string $needle): bool
5600
    {
5601 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5602
    }
5603
5604
    /**
5605
     * Returns true if the string ends with any of $substrings, false otherwise.
5606
     *
5607
     * - case-sensitive
5608
     *
5609
     * @param string   $str        <p>The input string.</p>
5610
     * @param string[] $substrings <p>Substrings to look for.</p>
5611
     *
5612
     * @return bool whether or not $str ends with $substring
5613
     */
5614 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5615
    {
5616 7
        if ($substrings === []) {
5617
            return false;
5618
        }
5619
5620 7
        foreach ($substrings as &$substring) {
5621 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5622 7
                return true;
5623
            }
5624
        }
5625
5626 6
        return false;
5627
    }
5628
5629
    /**
5630
     * Ensures that the string begins with $substring. If it doesn't, it's
5631
     * prepended.
5632
     *
5633
     * @param string $str       <p>The input string.</p>
5634
     * @param string $substring <p>The substring to add if not present.</p>
5635
     *
5636
     * @return string
5637
     */
5638 10
    public static function str_ensure_left(string $str, string $substring): string
5639
    {
5640
        if (
5641 10
            $substring !== ''
5642
            &&
5643 10
            \strpos($str, $substring) === 0
5644
        ) {
5645 6
            return $str;
5646
        }
5647
5648 4
        return $substring . $str;
5649
    }
5650
5651
    /**
5652
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5653
     *
5654
     * @param string $str       <p>The input string.</p>
5655
     * @param string $substring <p>The substring to add if not present.</p>
5656
     *
5657
     * @return string
5658
     */
5659 10
    public static function str_ensure_right(string $str, string $substring): string
5660
    {
5661
        if (
5662 10
            $str === ''
5663
            ||
5664 10
            $substring === ''
5665
            ||
5666 10
            \substr($str, -\strlen($substring)) !== $substring
5667
        ) {
5668 4
            $str .= $substring;
5669
        }
5670
5671 10
        return $str;
5672
    }
5673
5674
    /**
5675
     * Capitalizes the first word of the string, replaces underscores with
5676
     * spaces, and strips '_id'.
5677
     *
5678
     * @param string $str
5679
     *
5680
     * @return string
5681
     */
5682 3
    public static function str_humanize($str): string
5683
    {
5684 3
        $str = \str_replace(
5685
            [
5686 3
                '_id',
5687
                '_',
5688
            ],
5689
            [
5690 3
                '',
5691
                ' ',
5692
            ],
5693 3
            $str
5694
        );
5695
5696 3
        return self::ucfirst(\trim($str));
5697
    }
5698
5699
    /**
5700
     * Check if the string ends with the given substring, case insensitive.
5701
     *
5702
     * @param string $haystack <p>The string to search in.</p>
5703
     * @param string $needle   <p>The substring to search for.</p>
5704
     *
5705
     * @return bool
5706
     */
5707 12
    public static function str_iends_with(string $haystack, string $needle): bool
5708
    {
5709 12
        if ($haystack === '' || $needle === '') {
5710 2
            return false;
5711
        }
5712
5713 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5714
    }
5715
5716
    /**
5717
     * Returns true if the string ends with any of $substrings, false otherwise.
5718
     *
5719
     * - case-insensitive
5720
     *
5721
     * @param string   $str        <p>The input string.</p>
5722
     * @param string[] $substrings <p>Substrings to look for.</p>
5723
     *
5724
     * @return bool whether or not $str ends with $substring
5725
     */
5726 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5727
    {
5728 4
        if ($substrings === []) {
5729
            return false;
5730
        }
5731
5732 4
        foreach ($substrings as &$substring) {
5733 4
            if (self::str_iends_with($str, $substring)) {
5734 4
                return true;
5735
            }
5736
        }
5737
5738
        return false;
5739
    }
5740
5741
    /**
5742
     * Returns the index of the first occurrence of $needle in the string,
5743
     * and false if not found. Accepts an optional offset from which to begin
5744
     * the search.
5745
     *
5746
     * @param string $str      <p>The input string.</p>
5747
     * @param string $needle   <p>Substring to look for.</p>
5748
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5749
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5750
     *
5751
     * @return false|int
5752
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5753
     */
5754 2
    public static function str_iindex_first(
5755
        string $str,
5756
        string $needle,
5757
        int $offset = 0,
5758
        string $encoding = 'UTF-8'
5759
    ) {
5760 2
        return self::stripos(
5761 2
            $str,
5762 2
            $needle,
5763 2
            $offset,
5764 2
            $encoding
5765
        );
5766
    }
5767
5768
    /**
5769
     * Returns the index of the last occurrence of $needle in the string,
5770
     * and false if not found. Accepts an optional offset from which to begin
5771
     * the search. Offsets may be negative to count from the last character
5772
     * in the string.
5773
     *
5774
     * @param string $str      <p>The input string.</p>
5775
     * @param string $needle   <p>Substring to look for.</p>
5776
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5777
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5778
     *
5779
     * @return false|int
5780
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5781
     */
5782
    public static function str_iindex_last(
5783
        string $str,
5784
        string $needle,
5785
        int $offset = 0,
5786
        string $encoding = 'UTF-8'
5787
    ) {
5788
        return self::strripos(
5789
            $str,
5790
            $needle,
5791
            $offset,
5792
            $encoding
5793
        );
5794
    }
5795
5796
    /**
5797
     * Returns the index of the first occurrence of $needle in the string,
5798
     * and false if not found. Accepts an optional offset from which to begin
5799
     * the search.
5800
     *
5801
     * @param string $str      <p>The input string.</p>
5802
     * @param string $needle   <p>Substring to look for.</p>
5803
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5804
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5805
     *
5806
     * @return false|int
5807
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5808
     */
5809 10
    public static function str_index_first(
5810
        string $str,
5811
        string $needle,
5812
        int $offset = 0,
5813
        string $encoding = 'UTF-8'
5814
    ) {
5815 10
        return self::strpos(
5816 10
            $str,
5817 10
            $needle,
5818 10
            $offset,
5819 10
            $encoding
5820
        );
5821
    }
5822
5823
    /**
5824
     * Returns the index of the last occurrence of $needle in the string,
5825
     * and false if not found. Accepts an optional offset from which to begin
5826
     * the search. Offsets may be negative to count from the last character
5827
     * in the string.
5828
     *
5829
     * @param string $str      <p>The input string.</p>
5830
     * @param string $needle   <p>Substring to look for.</p>
5831
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5832
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5833
     *
5834
     * @return false|int
5835
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5836
     */
5837 10
    public static function str_index_last(
5838
        string $str,
5839
        string $needle,
5840
        int $offset = 0,
5841
        string $encoding = 'UTF-8'
5842
    ) {
5843 10
        return self::strrpos(
5844 10
            $str,
5845 10
            $needle,
5846 10
            $offset,
5847 10
            $encoding
5848
        );
5849
    }
5850
5851
    /**
5852
     * Inserts $substring into the string at the $index provided.
5853
     *
5854
     * @param string $str       <p>The input string.</p>
5855
     * @param string $substring <p>String to be inserted.</p>
5856
     * @param int    $index     <p>The index at which to insert the substring.</p>
5857
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5858
     *
5859
     * @return string
5860
     */
5861 8
    public static function str_insert(
5862
        string $str,
5863
        string $substring,
5864
        int $index,
5865
        string $encoding = 'UTF-8'
5866
    ): string {
5867 8
        if ($encoding === 'UTF-8') {
5868 4
            $len = (int) \mb_strlen($str);
5869 4
            if ($index > $len) {
5870
                return $str;
5871
            }
5872
5873
            /** @noinspection UnnecessaryCastingInspection */
5874 4
            return (string) \mb_substr($str, 0, $index) .
5875 4
                   $substring .
5876 4
                   (string) \mb_substr($str, $index, $len);
5877
        }
5878
5879 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5880
5881 4
        $len = (int) self::strlen($str, $encoding);
5882 4
        if ($index > $len) {
5883 1
            return $str;
5884
        }
5885
5886 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5887 3
               $substring .
5888 3
               ((string) self::substr($str, $index, $len, $encoding));
5889
    }
5890
5891
    /**
5892
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5893
     *
5894
     * @see  http://php.net/manual/en/function.str-ireplace.php
5895
     *
5896
     * @param mixed $search  <p>
5897
     *                       Every replacement with search array is
5898
     *                       performed on the result of previous replacement.
5899
     *                       </p>
5900
     * @param mixed $replace <p>
5901
     *                       </p>
5902
     * @param mixed $subject <p>
5903
     *                       If subject is an array, then the search and
5904
     *                       replace is performed with every entry of
5905
     *                       subject, and the return value is an array as
5906
     *                       well.
5907
     *                       </p>
5908
     * @param int   $count   [optional] <p>
5909
     *                       The number of matched and replaced needles will
5910
     *                       be returned in count which is passed by
5911
     *                       reference.
5912
     *                       </p>
5913
     *
5914
     * @return mixed a string or an array of replacements
5915
     */
5916 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5917
    {
5918 29
        $search = (array) $search;
5919
5920
        /** @noinspection AlterInForeachInspection */
5921 29
        foreach ($search as &$s) {
5922 29
            $s = (string) $s;
5923 29
            if ($s === '') {
5924 6
                $s = '/^(?<=.)$/';
5925
            } else {
5926 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5927
            }
5928
        }
5929
5930 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5931 29
        $count = $replace; // used as reference parameter
5932
5933 29
        return $subject;
5934
    }
5935
5936
    /**
5937
     * Replaces $search from the beginning of string with $replacement.
5938
     *
5939
     * @param string $str         <p>The input string.</p>
5940
     * @param string $search      <p>The string to search for.</p>
5941
     * @param string $replacement <p>The replacement.</p>
5942
     *
5943
     * @return string string after the replacements
5944
     */
5945 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5946
    {
5947 17
        if ($str === '') {
5948 4
            if ($replacement === '') {
5949 2
                return '';
5950
            }
5951
5952 2
            if ($search === '') {
5953 2
                return $replacement;
5954
            }
5955
        }
5956
5957 13
        if ($search === '') {
5958 2
            return $str . $replacement;
5959
        }
5960
5961 11
        if (\stripos($str, $search) === 0) {
5962 10
            return $replacement . \substr($str, \strlen($search));
5963
        }
5964
5965 1
        return $str;
5966
    }
5967
5968
    /**
5969
     * Replaces $search from the ending of string with $replacement.
5970
     *
5971
     * @param string $str         <p>The input string.</p>
5972
     * @param string $search      <p>The string to search for.</p>
5973
     * @param string $replacement <p>The replacement.</p>
5974
     *
5975
     * @return string string after the replacements
5976
     */
5977 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5978
    {
5979 17
        if ($str === '') {
5980 4
            if ($replacement === '') {
5981 2
                return '';
5982
            }
5983
5984 2
            if ($search === '') {
5985 2
                return $replacement;
5986
            }
5987
        }
5988
5989 13
        if ($search === '') {
5990 2
            return $str . $replacement;
5991
        }
5992
5993 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5994 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5995
        }
5996
5997 11
        return $str;
5998
    }
5999
6000
    /**
6001
     * Check if the string starts with the given substring, case insensitive.
6002
     *
6003
     * @param string $haystack <p>The string to search in.</p>
6004
     * @param string $needle   <p>The substring to search for.</p>
6005
     *
6006
     * @return bool
6007
     */
6008 12
    public static function str_istarts_with(string $haystack, string $needle): bool
6009
    {
6010 12
        if ($haystack === '' || $needle === '') {
6011 2
            return false;
6012
        }
6013
6014 12
        return self::stripos($haystack, $needle) === 0;
6015
    }
6016
6017
    /**
6018
     * Returns true if the string begins with any of $substrings, false otherwise.
6019
     *
6020
     * - case-insensitive
6021
     *
6022
     * @param string $str        <p>The input string.</p>
6023
     * @param array  $substrings <p>Substrings to look for.</p>
6024
     *
6025
     * @return bool whether or not $str starts with $substring
6026
     */
6027 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
6028
    {
6029 4
        if ($str === '') {
6030
            return false;
6031
        }
6032
6033 4
        if ($substrings === []) {
6034
            return false;
6035
        }
6036
6037 4
        foreach ($substrings as &$substring) {
6038 4
            if (self::str_istarts_with($str, $substring)) {
6039 4
                return true;
6040
            }
6041
        }
6042
6043
        return false;
6044
    }
6045
6046
    /**
6047
     * Gets the substring after the first occurrence of a separator.
6048
     *
6049
     * @param string $str       <p>The input string.</p>
6050
     * @param string $separator <p>The string separator.</p>
6051
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6052
     *
6053
     * @return string
6054
     */
6055 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6056
    {
6057 1
        if ($separator === '' || $str === '') {
6058 1
            return '';
6059
        }
6060
6061 1
        $offset = self::str_iindex_first($str, $separator);
6062 1
        if ($offset === false) {
6063 1
            return '';
6064
        }
6065
6066 1
        if ($encoding === 'UTF-8') {
6067 1
            return (string) \mb_substr(
6068 1
                $str,
6069 1
                $offset + (int) \mb_strlen($separator)
6070
            );
6071
        }
6072
6073
        return (string) self::substr(
6074
            $str,
6075
            $offset + (int) self::strlen($separator, $encoding),
6076
            null,
6077
            $encoding
6078
        );
6079
    }
6080
6081
    /**
6082
     * Gets the substring after the last occurrence of a separator.
6083
     *
6084
     * @param string $str       <p>The input string.</p>
6085
     * @param string $separator <p>The string separator.</p>
6086
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6087
     *
6088
     * @return string
6089
     */
6090 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6091
    {
6092 1
        if ($separator === '' || $str === '') {
6093 1
            return '';
6094
        }
6095
6096 1
        $offset = self::strripos($str, $separator);
6097 1
        if ($offset === false) {
6098 1
            return '';
6099
        }
6100
6101 1
        if ($encoding === 'UTF-8') {
6102 1
            return (string) \mb_substr(
6103 1
                $str,
6104 1
                $offset + (int) self::strlen($separator)
6105
            );
6106
        }
6107
6108
        return (string) self::substr(
6109
            $str,
6110
            $offset + (int) self::strlen($separator, $encoding),
6111
            null,
6112
            $encoding
6113
        );
6114
    }
6115
6116
    /**
6117
     * Gets the substring before the first occurrence of a separator.
6118
     *
6119
     * @param string $str       <p>The input string.</p>
6120
     * @param string $separator <p>The string separator.</p>
6121
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6122
     *
6123
     * @return string
6124
     */
6125 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6126
    {
6127 1
        if ($separator === '' || $str === '') {
6128 1
            return '';
6129
        }
6130
6131 1
        $offset = self::str_iindex_first($str, $separator);
6132 1
        if ($offset === false) {
6133 1
            return '';
6134
        }
6135
6136 1
        if ($encoding === 'UTF-8') {
6137 1
            return (string) \mb_substr($str, 0, $offset);
6138
        }
6139
6140
        return (string) self::substr($str, 0, $offset, $encoding);
6141
    }
6142
6143
    /**
6144
     * Gets the substring before the last occurrence of a separator.
6145
     *
6146
     * @param string $str       <p>The input string.</p>
6147
     * @param string $separator <p>The string separator.</p>
6148
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6149
     *
6150
     * @return string
6151
     */
6152 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6153
    {
6154 1
        if ($separator === '' || $str === '') {
6155 1
            return '';
6156
        }
6157
6158 1
        if ($encoding === 'UTF-8') {
6159 1
            $offset = \mb_strripos($str, $separator);
6160 1
            if ($offset === false) {
6161 1
                return '';
6162
            }
6163
6164 1
            return (string) \mb_substr($str, 0, $offset);
6165
        }
6166
6167
        $offset = self::strripos($str, $separator, 0, $encoding);
6168
        if ($offset === false) {
6169
            return '';
6170
        }
6171
6172
        return (string) self::substr($str, 0, $offset, $encoding);
6173
    }
6174
6175
    /**
6176
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6177
     *
6178
     * @param string $str          <p>The input string.</p>
6179
     * @param string $needle       <p>The string to look for.</p>
6180
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6181
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6182
     *
6183
     * @return string
6184
     */
6185 2
    public static function str_isubstr_first(
6186
        string $str,
6187
        string $needle,
6188
        bool $beforeNeedle = false,
6189
        string $encoding = 'UTF-8'
6190
    ): string {
6191
        if (
6192 2
            $needle === ''
6193
            ||
6194 2
            $str === ''
6195
        ) {
6196 2
            return '';
6197
        }
6198
6199 2
        $part = self::stristr(
6200 2
            $str,
6201 2
            $needle,
6202 2
            $beforeNeedle,
6203 2
            $encoding
6204
        );
6205 2
        if ($part === false) {
6206 2
            return '';
6207
        }
6208
6209 2
        return $part;
6210
    }
6211
6212
    /**
6213
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6214
     *
6215
     * @param string $str          <p>The input string.</p>
6216
     * @param string $needle       <p>The string to look for.</p>
6217
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6218
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6219
     *
6220
     * @return string
6221
     */
6222 1
    public static function str_isubstr_last(
6223
        string $str,
6224
        string $needle,
6225
        bool $beforeNeedle = false,
6226
        string $encoding = 'UTF-8'
6227
    ): string {
6228
        if (
6229 1
            $needle === ''
6230
            ||
6231 1
            $str === ''
6232
        ) {
6233 1
            return '';
6234
        }
6235
6236 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6237 1
        if ($part === false) {
6238 1
            return '';
6239
        }
6240
6241 1
        return $part;
6242
    }
6243
6244
    /**
6245
     * Returns the last $n characters of the string.
6246
     *
6247
     * @param string $str      <p>The input string.</p>
6248
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6249
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6250
     *
6251
     * @return string
6252
     */
6253 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6254
    {
6255 12
        if ($str === '' || $n <= 0) {
6256 4
            return '';
6257
        }
6258
6259 8
        if ($encoding === 'UTF-8') {
6260 4
            return (string) \mb_substr($str, -$n);
6261
        }
6262
6263 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6264
6265 4
        return (string) self::substr($str, -$n, null, $encoding);
6266
    }
6267
6268
    /**
6269
     * Limit the number of characters in a string.
6270
     *
6271
     * @param string $str      <p>The input string.</p>
6272
     * @param int    $length   [optional] <p>Default: 100</p>
6273
     * @param string $strAddOn [optional] <p>Default: …</p>
6274
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6275
     *
6276
     * @return string
6277
     */
6278 2
    public static function str_limit(
6279
        string $str,
6280
        int $length = 100,
6281
        string $strAddOn = '…',
6282
        string $encoding = 'UTF-8'
6283
    ): string {
6284 2
        if ($str === '' || $length <= 0) {
6285 2
            return '';
6286
        }
6287
6288 2
        if ($encoding === 'UTF-8') {
6289 2
            if ((int) \mb_strlen($str) <= $length) {
6290 2
                return $str;
6291
            }
6292
6293
            /** @noinspection UnnecessaryCastingInspection */
6294 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6295
        }
6296
6297
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6298
6299
        if ((int) self::strlen($str, $encoding) <= $length) {
6300
            return $str;
6301
        }
6302
6303
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6304
    }
6305
6306
    /**
6307
     * Limit the number of characters in a string, but also after the next word.
6308
     *
6309
     * @param string $str      <p>The input string.</p>
6310
     * @param int    $length   [optional] <p>Default: 100</p>
6311
     * @param string $strAddOn [optional] <p>Default: …</p>
6312
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6313
     *
6314
     * @return string
6315
     */
6316 6
    public static function str_limit_after_word(
6317
        string $str,
6318
        int $length = 100,
6319
        string $strAddOn = '…',
6320
        string $encoding = 'UTF-8'
6321
    ): string {
6322 6
        if ($str === '' || $length <= 0) {
6323 2
            return '';
6324
        }
6325
6326 6
        if ($encoding === 'UTF-8') {
6327
            /** @noinspection UnnecessaryCastingInspection */
6328 2
            if ((int) \mb_strlen($str) <= $length) {
6329 2
                return $str;
6330
            }
6331
6332 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6333 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6334
            }
6335
6336 2
            $str = \mb_substr($str, 0, $length);
6337
6338 2
            $array = \explode(' ', $str);
6339 2
            \array_pop($array);
6340 2
            $new_str = \implode(' ', $array);
6341
6342 2
            if ($new_str === '') {
6343 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6344
            }
6345
        } else {
6346 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6347
                return $str;
6348
            }
6349
6350 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6351 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6352
            }
6353
6354 1
            $str = self::substr($str, 0, $length, $encoding);
6355 1
            if ($str === false) {
6356
                return '' . $strAddOn;
6357
            }
6358
6359 1
            $array = \explode(' ', $str);
6360 1
            \array_pop($array);
6361 1
            $new_str = \implode(' ', $array);
6362
6363 1
            if ($new_str === '') {
6364
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6365
            }
6366
        }
6367
6368 3
        return $new_str . $strAddOn;
6369
    }
6370
6371
    /**
6372
     * Returns the longest common prefix between the string and $otherStr.
6373
     *
6374
     * @param string $str      <p>The input sting.</p>
6375
     * @param string $otherStr <p>Second string for comparison.</p>
6376
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6377
     *
6378
     * @return string
6379
     */
6380 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6381
    {
6382
        // init
6383 10
        $longestCommonPrefix = '';
6384
6385 10
        if ($encoding === 'UTF-8') {
6386 5
            $maxLength = (int) \min(
6387 5
                \mb_strlen($str),
6388 5
                \mb_strlen($otherStr)
6389
            );
6390
6391 5
            for ($i = 0; $i < $maxLength; ++$i) {
6392 4
                $char = \mb_substr($str, $i, 1);
6393
6394
                if (
6395 4
                    $char !== false
6396
                    &&
6397 4
                    $char === \mb_substr($otherStr, $i, 1)
6398
                ) {
6399 3
                    $longestCommonPrefix .= $char;
6400
                } else {
6401 3
                    break;
6402
                }
6403
            }
6404
        } else {
6405 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6406
6407 5
            $maxLength = (int) \min(
6408 5
                self::strlen($str, $encoding),
6409 5
                self::strlen($otherStr, $encoding)
6410
            );
6411
6412 5
            for ($i = 0; $i < $maxLength; ++$i) {
6413 4
                $char = self::substr($str, $i, 1, $encoding);
6414
6415
                if (
6416 4
                    $char !== false
6417
                    &&
6418 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6419
                ) {
6420 3
                    $longestCommonPrefix .= $char;
6421
                } else {
6422 3
                    break;
6423
                }
6424
            }
6425
        }
6426
6427 10
        return $longestCommonPrefix;
6428
    }
6429
6430
    /**
6431
     * Returns the longest common substring between the string and $otherStr.
6432
     * In the case of ties, it returns that which occurs first.
6433
     *
6434
     * @param string $str
6435
     * @param string $otherStr <p>Second string for comparison.</p>
6436
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6437
     *
6438
     * @return string string with its $str being the longest common substring
6439
     */
6440 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6441
    {
6442 11
        if ($str === '' || $otherStr === '') {
6443 2
            return '';
6444
        }
6445
6446
        // Uses dynamic programming to solve
6447
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6448
6449 9
        if ($encoding === 'UTF-8') {
6450 4
            $strLength = (int) \mb_strlen($str);
6451 4
            $otherLength = (int) \mb_strlen($otherStr);
6452
        } else {
6453 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6454
6455 5
            $strLength = (int) self::strlen($str, $encoding);
6456 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6457
        }
6458
6459
        // Return if either string is empty
6460 9
        if ($strLength === 0 || $otherLength === 0) {
6461
            return '';
6462
        }
6463
6464 9
        $len = 0;
6465 9
        $end = 0;
6466 9
        $table = \array_fill(
6467 9
            0,
6468 9
            $strLength + 1,
6469 9
            \array_fill(0, $otherLength + 1, 0)
6470
        );
6471
6472 9
        if ($encoding === 'UTF-8') {
6473 9
            for ($i = 1; $i <= $strLength; ++$i) {
6474 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6475 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6476 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6477
6478 9
                    if ($strChar === $otherChar) {
6479 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6480 8
                        if ($table[$i][$j] > $len) {
6481 8
                            $len = $table[$i][$j];
6482 8
                            $end = $i;
6483
                        }
6484
                    } else {
6485 9
                        $table[$i][$j] = 0;
6486
                    }
6487
                }
6488
            }
6489
        } else {
6490
            for ($i = 1; $i <= $strLength; ++$i) {
6491
                for ($j = 1; $j <= $otherLength; ++$j) {
6492
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6493
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6494
6495
                    if ($strChar === $otherChar) {
6496
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6497
                        if ($table[$i][$j] > $len) {
6498
                            $len = $table[$i][$j];
6499
                            $end = $i;
6500
                        }
6501
                    } else {
6502
                        $table[$i][$j] = 0;
6503
                    }
6504
                }
6505
            }
6506
        }
6507
6508 9
        if ($encoding === 'UTF-8') {
6509 9
            return (string) \mb_substr($str, $end - $len, $len);
6510
        }
6511
6512
        return (string) self::substr($str, $end - $len, $len, $encoding);
6513
    }
6514
6515
    /**
6516
     * Returns the longest common suffix between the string and $otherStr.
6517
     *
6518
     * @param string $str
6519
     * @param string $otherStr <p>Second string for comparison.</p>
6520
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6521
     *
6522
     * @return string
6523
     */
6524 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6525
    {
6526 10
        if ($str === '' || $otherStr === '') {
6527 2
            return '';
6528
        }
6529
6530 8
        if ($encoding === 'UTF-8') {
6531 4
            $maxLength = (int) \min(
6532 4
                \mb_strlen($str, $encoding),
6533 4
                \mb_strlen($otherStr, $encoding)
6534
            );
6535
6536 4
            $longestCommonSuffix = '';
6537 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6538 4
                $char = \mb_substr($str, -$i, 1);
6539
6540
                if (
6541 4
                    $char !== false
6542
                    &&
6543 4
                    $char === \mb_substr($otherStr, -$i, 1)
6544
                ) {
6545 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6546
                } else {
6547 3
                    break;
6548
                }
6549
            }
6550
        } else {
6551 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6552
6553 4
            $maxLength = (int) \min(
6554 4
                self::strlen($str, $encoding),
6555 4
                self::strlen($otherStr, $encoding)
6556
            );
6557
6558 4
            $longestCommonSuffix = '';
6559 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6560 4
                $char = self::substr($str, -$i, 1, $encoding);
6561
6562
                if (
6563 4
                    $char !== false
6564
                    &&
6565 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6566
                ) {
6567 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6568
                } else {
6569 3
                    break;
6570
                }
6571
            }
6572
        }
6573
6574 8
        return $longestCommonSuffix;
6575
    }
6576
6577
    /**
6578
     * Returns true if $str matches the supplied pattern, false otherwise.
6579
     *
6580
     * @param string $str     <p>The input string.</p>
6581
     * @param string $pattern <p>Regex pattern to match against.</p>
6582
     *
6583
     * @return bool whether or not $str matches the pattern
6584
     */
6585
    public static function str_matches_pattern(string $str, string $pattern): bool
6586
    {
6587
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6588
    }
6589
6590
    /**
6591
     * Returns whether or not a character exists at an index. Offsets may be
6592
     * negative to count from the last character in the string. Implements
6593
     * part of the ArrayAccess interface.
6594
     *
6595
     * @param string $str      <p>The input string.</p>
6596
     * @param int    $offset   <p>The index to check.</p>
6597
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6598
     *
6599
     * @return bool whether or not the index exists
6600
     */
6601 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6602
    {
6603
        // init
6604 6
        $length = (int) self::strlen($str, $encoding);
6605
6606 6
        if ($offset >= 0) {
6607 3
            return $length > $offset;
6608
        }
6609
6610 3
        return $length >= \abs($offset);
6611
    }
6612
6613
    /**
6614
     * Returns the character at the given index. Offsets may be negative to
6615
     * count from the last character in the string. Implements part of the
6616
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6617
     * does not exist.
6618
     *
6619
     * @param string $str      <p>The input string.</p>
6620
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6621
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6622
     *
6623
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6624
     *
6625
     * @return string the character at the specified index
6626
     */
6627 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6628
    {
6629
        // init
6630 2
        $length = (int) self::strlen($str);
6631
6632
        if (
6633 2
            ($index >= 0 && $length <= $index)
6634
            ||
6635 2
            $length < \abs($index)
6636
        ) {
6637 1
            throw new \OutOfBoundsException('No character exists at the index');
6638
        }
6639
6640 1
        return self::char_at($str, $index, $encoding);
6641
    }
6642
6643
    /**
6644
     * Pad a UTF-8 string to given length with another string.
6645
     *
6646
     * @param string     $str        <p>The input string.</p>
6647
     * @param int        $pad_length <p>The length of return string.</p>
6648
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6649
     * @param int|string $pad_type   [optional] <p>
6650
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6651
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6652
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6653
     *                               </p>
6654
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6655
     *
6656
     * @return string returns the padded string
6657
     */
6658 41
    public static function str_pad(
6659
        string $str,
6660
        int $pad_length,
6661
        string $pad_string = ' ',
6662
        $pad_type = \STR_PAD_RIGHT,
6663
        string $encoding = 'UTF-8'
6664
    ): string {
6665 41
        if ($pad_length === 0 || $pad_string === '') {
6666 1
            return $str;
6667
        }
6668
6669 41
        if ($pad_type !== (int) $pad_type) {
6670 13
            if ($pad_type === 'left') {
6671 3
                $pad_type = \STR_PAD_LEFT;
6672 10
            } elseif ($pad_type === 'right') {
6673 6
                $pad_type = \STR_PAD_RIGHT;
6674 4
            } elseif ($pad_type === 'both') {
6675 3
                $pad_type = \STR_PAD_BOTH;
6676
            } else {
6677 1
                throw new \InvalidArgumentException(
6678 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6679
                );
6680
            }
6681
        }
6682
6683 40
        if ($encoding === 'UTF-8') {
6684 25
            $str_length = (int) \mb_strlen($str);
6685
6686 25
            if ($pad_length >= $str_length) {
6687
                switch ($pad_type) {
6688 25
                    case \STR_PAD_LEFT:
6689 8
                        $ps_length = (int) \mb_strlen($pad_string);
6690
6691 8
                        $diff = ($pad_length - $str_length);
6692
6693 8
                        $pre = (string) \mb_substr(
6694 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6695 8
                            0,
6696 8
                            $diff
6697
                        );
6698 8
                        $post = '';
6699
6700 8
                        break;
6701
6702 20
                    case \STR_PAD_BOTH:
6703 14
                        $diff = ($pad_length - $str_length);
6704
6705 14
                        $ps_length_left = (int) \floor($diff / 2);
6706
6707 14
                        $ps_length_right = (int) \ceil($diff / 2);
6708
6709 14
                        $pre = (string) \mb_substr(
6710 14
                            \str_repeat($pad_string, $ps_length_left),
6711 14
                            0,
6712 14
                            $ps_length_left
6713
                        );
6714 14
                        $post = (string) \mb_substr(
6715 14
                            \str_repeat($pad_string, $ps_length_right),
6716 14
                            0,
6717 14
                            $ps_length_right
6718
                        );
6719
6720 14
                        break;
6721
6722 9
                    case \STR_PAD_RIGHT:
6723
                    default:
6724 9
                        $ps_length = (int) \mb_strlen($pad_string);
6725
6726 9
                        $diff = ($pad_length - $str_length);
6727
6728 9
                        $post = (string) \mb_substr(
6729 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6730 9
                            0,
6731 9
                            $diff
6732
                        );
6733 9
                        $pre = '';
6734
                }
6735
6736 25
                return $pre . $str . $post;
6737
            }
6738
6739 3
            return $str;
6740
        }
6741
6742 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6743
6744 15
        $str_length = (int) self::strlen($str, $encoding);
6745
6746 15
        if ($pad_length >= $str_length) {
6747
            switch ($pad_type) {
6748 14
                case \STR_PAD_LEFT:
6749 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6750
6751 5
                    $diff = ($pad_length - $str_length);
6752
6753 5
                    $pre = (string) self::substr(
6754 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6755 5
                        0,
6756 5
                        $diff,
6757 5
                        $encoding
6758
                    );
6759 5
                    $post = '';
6760
6761 5
                    break;
6762
6763 9
                case \STR_PAD_BOTH:
6764 3
                    $diff = ($pad_length - $str_length);
6765
6766 3
                    $ps_length_left = (int) \floor($diff / 2);
6767
6768 3
                    $ps_length_right = (int) \ceil($diff / 2);
6769
6770 3
                    $pre = (string) self::substr(
6771 3
                        \str_repeat($pad_string, $ps_length_left),
6772 3
                        0,
6773 3
                        $ps_length_left,
6774 3
                        $encoding
6775
                    );
6776 3
                    $post = (string) self::substr(
6777 3
                        \str_repeat($pad_string, $ps_length_right),
6778 3
                        0,
6779 3
                        $ps_length_right,
6780 3
                        $encoding
6781
                    );
6782
6783 3
                    break;
6784
6785 6
                case \STR_PAD_RIGHT:
6786
                default:
6787 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6788
6789 6
                    $diff = ($pad_length - $str_length);
6790
6791 6
                    $post = (string) self::substr(
6792 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6793 6
                        0,
6794 6
                        $diff,
6795 6
                        $encoding
6796
                    );
6797 6
                    $pre = '';
6798
            }
6799
6800 14
            return $pre . $str . $post;
6801
        }
6802
6803 1
        return $str;
6804
    }
6805
6806
    /**
6807
     * Returns a new string of a given length such that both sides of the
6808
     * string are padded. Alias for pad() with a $padType of 'both'.
6809
     *
6810
     * @param string $str
6811
     * @param int    $length   <p>Desired string length after padding.</p>
6812
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6813
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6814
     *
6815
     * @return string string with padding applied
6816
     */
6817 11
    public static function str_pad_both(
6818
        string $str,
6819
        int $length,
6820
        string $padStr = ' ',
6821
        string $encoding = 'UTF-8'
6822
    ): string {
6823 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6824
    }
6825
6826
    /**
6827
     * Returns a new string of a given length such that the beginning of the
6828
     * string is padded. Alias for pad() with a $padType of 'left'.
6829
     *
6830
     * @param string $str
6831
     * @param int    $length   <p>Desired string length after padding.</p>
6832
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6833
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6834
     *
6835
     * @return string string with left padding
6836
     */
6837 7
    public static function str_pad_left(
6838
        string $str,
6839
        int $length,
6840
        string $padStr = ' ',
6841
        string $encoding = 'UTF-8'
6842
    ): string {
6843 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6844
    }
6845
6846
    /**
6847
     * Returns a new string of a given length such that the end of the string
6848
     * is padded. Alias for pad() with a $padType of 'right'.
6849
     *
6850
     * @param string $str
6851
     * @param int    $length   <p>Desired string length after padding.</p>
6852
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6853
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6854
     *
6855
     * @return string string with right padding
6856
     */
6857 7
    public static function str_pad_right(
6858
        string $str,
6859
        int $length,
6860
        string $padStr = ' ',
6861
        string $encoding = 'UTF-8'
6862
    ): string {
6863 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6864
    }
6865
6866
    /**
6867
     * Repeat a string.
6868
     *
6869
     * @param string $str        <p>
6870
     *                           The string to be repeated.
6871
     *                           </p>
6872
     * @param int    $multiplier <p>
6873
     *                           Number of time the input string should be
6874
     *                           repeated.
6875
     *                           </p>
6876
     *                           <p>
6877
     *                           multiplier has to be greater than or equal to 0.
6878
     *                           If the multiplier is set to 0, the function
6879
     *                           will return an empty string.
6880
     *                           </p>
6881
     *
6882
     * @return string the repeated string
6883
     */
6884 9
    public static function str_repeat(string $str, int $multiplier): string
6885
    {
6886 9
        $str = self::filter($str);
6887
6888 9
        return \str_repeat($str, $multiplier);
6889
    }
6890
6891
    /**
6892
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6893
     *
6894
     * Replace all occurrences of the search string with the replacement string
6895
     *
6896
     * @see http://php.net/manual/en/function.str-replace.php
6897
     *
6898
     * @param mixed $search  <p>
6899
     *                       The value being searched for, otherwise known as the needle.
6900
     *                       An array may be used to designate multiple needles.
6901
     *                       </p>
6902
     * @param mixed $replace <p>
6903
     *                       The replacement value that replaces found search
6904
     *                       values. An array may be used to designate multiple replacements.
6905
     *                       </p>
6906
     * @param mixed $subject <p>
6907
     *                       The string or array being searched and replaced on,
6908
     *                       otherwise known as the haystack.
6909
     *                       </p>
6910
     *                       <p>
6911
     *                       If subject is an array, then the search and
6912
     *                       replace is performed with every entry of
6913
     *                       subject, and the return value is an array as
6914
     *                       well.
6915
     *                       </p>
6916
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6917
     *
6918
     * @return mixed this function returns a string or an array with the replaced values
6919
     */
6920 12
    public static function str_replace(
6921
        $search,
6922
        $replace,
6923
        $subject,
6924
        int &$count = null
6925
    ) {
6926
        /**
6927
         * @psalm-suppress PossiblyNullArgument
6928
         */
6929 12
        return \str_replace(
6930 12
            $search,
6931 12
            $replace,
6932 12
            $subject,
6933 12
            $count
6934
        );
6935
    }
6936
6937
    /**
6938
     * Replaces $search from the beginning of string with $replacement.
6939
     *
6940
     * @param string $str         <p>The input string.</p>
6941
     * @param string $search      <p>The string to search for.</p>
6942
     * @param string $replacement <p>The replacement.</p>
6943
     *
6944
     * @return string string after the replacements
6945
     */
6946 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6947
    {
6948 17
        if ($str === '') {
6949 4
            if ($replacement === '') {
6950 2
                return '';
6951
            }
6952
6953 2
            if ($search === '') {
6954 2
                return $replacement;
6955
            }
6956
        }
6957
6958 13
        if ($search === '') {
6959 2
            return $str . $replacement;
6960
        }
6961
6962 11
        if (\strpos($str, $search) === 0) {
6963 9
            return $replacement . \substr($str, \strlen($search));
6964
        }
6965
6966 2
        return $str;
6967
    }
6968
6969
    /**
6970
     * Replaces $search from the ending of string with $replacement.
6971
     *
6972
     * @param string $str         <p>The input string.</p>
6973
     * @param string $search      <p>The string to search for.</p>
6974
     * @param string $replacement <p>The replacement.</p>
6975
     *
6976
     * @return string string after the replacements
6977
     */
6978 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6979
    {
6980 17
        if ($str === '') {
6981 4
            if ($replacement === '') {
6982 2
                return '';
6983
            }
6984
6985 2
            if ($search === '') {
6986 2
                return $replacement;
6987
            }
6988
        }
6989
6990 13
        if ($search === '') {
6991 2
            return $str . $replacement;
6992
        }
6993
6994 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6995 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6996
        }
6997
6998 11
        return $str;
6999
    }
7000
7001
    /**
7002
     * Replace the first "$search"-term with the "$replace"-term.
7003
     *
7004
     * @param string $search
7005
     * @param string $replace
7006
     * @param string $subject
7007
     *
7008
     * @return string
7009
     *
7010
     * @psalm-suppress InvalidReturnType
7011
     */
7012 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
7013
    {
7014 2
        $pos = self::strpos($subject, $search);
7015
7016 2
        if ($pos !== false) {
7017
            /**
7018
             * @psalm-suppress InvalidReturnStatement
7019
             */
7020 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7021 2
                $subject,
7022 2
                $replace,
7023 2
                $pos,
7024 2
                (int) self::strlen($search)
7025
            );
7026
        }
7027
7028 2
        return $subject;
7029
    }
7030
7031
    /**
7032
     * Replace the last "$search"-term with the "$replace"-term.
7033
     *
7034
     * @param string $search
7035
     * @param string $replace
7036
     * @param string $subject
7037
     *
7038
     * @return string
7039
     *
7040
     * @psalm-suppress InvalidReturnType
7041
     */
7042 2
    public static function str_replace_last(
7043
        string $search,
7044
        string $replace,
7045
        string $subject
7046
    ): string {
7047 2
        $pos = self::strrpos($subject, $search);
7048 2
        if ($pos !== false) {
7049
            /**
7050
             * @psalm-suppress InvalidReturnStatement
7051
             */
7052 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7053 2
                $subject,
7054 2
                $replace,
7055 2
                $pos,
7056 2
                (int) self::strlen($search)
7057
            );
7058
        }
7059
7060 2
        return $subject;
7061
    }
7062
7063
    /**
7064
     * Shuffles all the characters in the string.
7065
     *
7066
     * PS: uses random algorithm which is weak for cryptography purposes
7067
     *
7068
     * @param string $str      <p>The input string</p>
7069
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7070
     *
7071
     * @return string the shuffled string
7072
     */
7073 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7074
    {
7075 5
        if ($encoding === 'UTF-8') {
7076 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7077
            /** @noinspection NonSecureShuffleUsageInspection */
7078 5
            \shuffle($indexes);
7079
7080
            // init
7081 5
            $shuffledStr = '';
7082
7083 5
            foreach ($indexes as &$i) {
7084 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7085 5
                if ($tmpSubStr !== false) {
7086 5
                    $shuffledStr .= $tmpSubStr;
7087
                }
7088
            }
7089
        } else {
7090
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7091
7092
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7093
            /** @noinspection NonSecureShuffleUsageInspection */
7094
            \shuffle($indexes);
7095
7096
            // init
7097
            $shuffledStr = '';
7098
7099
            foreach ($indexes as &$i) {
7100
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7101
                if ($tmpSubStr !== false) {
7102
                    $shuffledStr .= $tmpSubStr;
7103
                }
7104
            }
7105
        }
7106
7107 5
        return $shuffledStr;
7108
    }
7109
7110
    /**
7111
     * Returns the substring beginning at $start, and up to, but not including
7112
     * the index specified by $end. If $end is omitted, the function extracts
7113
     * the remaining string. If $end is negative, it is computed from the end
7114
     * of the string.
7115
     *
7116
     * @param string $str
7117
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7118
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7119
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7120
     *
7121
     * @return false|string
7122
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7123
     *                      characters long, <b>FALSE</b> will be returned.
7124
     */
7125 18
    public static function str_slice(
7126
        string $str,
7127
        int $start,
7128
        int $end = null,
7129
        string $encoding = 'UTF-8'
7130
    ) {
7131 18
        if ($encoding === 'UTF-8') {
7132 7
            if ($end === null) {
7133 1
                $length = (int) \mb_strlen($str);
7134 6
            } elseif ($end >= 0 && $end <= $start) {
7135 2
                return '';
7136 4
            } elseif ($end < 0) {
7137 1
                $length = (int) \mb_strlen($str) + $end - $start;
7138
            } else {
7139 3
                $length = $end - $start;
7140
            }
7141
7142 5
            return \mb_substr($str, $start, $length);
7143
        }
7144
7145 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7146
7147 11
        if ($end === null) {
7148 5
            $length = (int) self::strlen($str, $encoding);
7149 6
        } elseif ($end >= 0 && $end <= $start) {
7150 2
            return '';
7151 4
        } elseif ($end < 0) {
7152 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7153
        } else {
7154 3
            $length = $end - $start;
7155
        }
7156
7157 9
        return self::substr($str, $start, $length, $encoding);
7158
    }
7159
7160
    /**
7161
     * Convert a string to e.g.: "snake_case"
7162
     *
7163
     * @param string $str
7164
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7165
     *
7166
     * @return string string in snake_case
7167
     */
7168 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7169
    {
7170 20
        if ($str === '') {
7171
            return '';
7172
        }
7173
7174 20
        $str = \str_replace(
7175 20
            '-',
7176 20
            '_',
7177 20
            self::normalize_whitespace($str)
7178
        );
7179
7180 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7181 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7182
        }
7183
7184 20
        $str = (string) \preg_replace_callback(
7185 20
            '/([\d|\p{Lu}])/u',
7186
            /**
7187
             * @param string[] $matches
7188
             *
7189
             * @return string
7190
             */
7191
            static function (array $matches) use ($encoding): string {
7192 9
                $match = $matches[1];
7193 9
                $matchInt = (int) $match;
7194
7195 9
                if ((string) $matchInt === $match) {
7196 4
                    return '_' . $match . '_';
7197
                }
7198
7199 5
                if ($encoding === 'UTF-8') {
7200 5
                    return '_' . \mb_strtolower($match);
7201
                }
7202
7203
                return '_' . self::strtolower($match, $encoding);
7204 20
            },
7205 20
            $str
7206
        );
7207
7208 20
        $str = (string) \preg_replace(
7209
            [
7210 20
                '/\s+/u',        // convert spaces to "_"
7211
                '/^\s+|\s+$/u',  // trim leading & trailing spaces
7212
                '/_+/',         // remove double "_"
7213
            ],
7214
            [
7215 20
                '_',
7216
                '',
7217
                '_',
7218
            ],
7219 20
            $str
7220
        );
7221
7222 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7223
    }
7224
7225
    /**
7226
     * Sort all characters according to code points.
7227
     *
7228
     * @param string $str    <p>A UTF-8 string.</p>
7229
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7230
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7231
     *
7232
     * @return string string of sorted characters
7233
     */
7234 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7235
    {
7236 2
        $array = self::codepoints($str);
7237
7238 2
        if ($unique) {
7239 2
            $array = \array_flip(\array_flip($array));
7240
        }
7241
7242 2
        if ($desc) {
7243 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7243
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7244
        } else {
7245 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7245
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7246
        }
7247
7248 2
        return self::string($array);
7249
    }
7250
7251
    /**
7252
     * alias for "UTF8::str_split()"
7253
     *
7254
     * @see UTF8::str_split()
7255
     *
7256
     * @param string|string[] $str
7257
     * @param int             $length
7258
     * @param bool            $cleanUtf8
7259
     *
7260
     * @return string[]
7261
     */
7262 9
    public static function split(
7263
        $str,
7264
        int $length = 1,
7265
        bool $cleanUtf8 = false
7266
    ): array {
7267 9
        return self::str_split($str, $length, $cleanUtf8);
7268
    }
7269
7270
    /**
7271
     * Splits the string with the provided regular expression, returning an
7272
     * array of Stringy objects. An optional integer $limit will truncate the
7273
     * results.
7274
     *
7275
     * @param string $str
7276
     * @param string $pattern <p>The regex with which to split the string.</p>
7277
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7278
     *
7279
     * @return string[] an array of strings
7280
     */
7281 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7282
    {
7283 16
        if ($limit === 0) {
7284 2
            return [];
7285
        }
7286
7287 14
        if ($pattern === '') {
7288 1
            return [$str];
7289
        }
7290
7291 13
        if (self::$SUPPORT['mbstring'] === true) {
7292 13
            if ($limit >= 0) {
7293
                /** @noinspection PhpComposerExtensionStubsInspection */
7294 8
                $resultTmp = \mb_split($pattern, $str);
7295
7296 8
                $result = [];
7297 8
                foreach ($resultTmp as $itemTmp) {
7298 8
                    if ($limit === 0) {
7299 4
                        break;
7300
                    }
7301 8
                    --$limit;
7302
7303 8
                    $result[] = $itemTmp;
7304
                }
7305
7306 8
                return $result;
7307
            }
7308
7309
            /** @noinspection PhpComposerExtensionStubsInspection */
7310 5
            return \mb_split($pattern, $str);
7311
        }
7312
7313
        if ($limit > 0) {
7314
            ++$limit;
7315
        } else {
7316
            $limit = -1;
7317
        }
7318
7319
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7320
7321
        if ($array === false) {
7322
            return [];
7323
        }
7324
7325
        if ($limit > 0 && \count($array) === $limit) {
7326
            \array_pop($array);
7327
        }
7328
7329
        return $array;
7330
    }
7331
7332
    /**
7333
     * Check if the string starts with the given substring.
7334
     *
7335
     * @param string $haystack <p>The string to search in.</p>
7336
     * @param string $needle   <p>The substring to search for.</p>
7337
     *
7338
     * @return bool
7339
     */
7340 19
    public static function str_starts_with(string $haystack, string $needle): bool
7341
    {
7342 19
        return \strpos($haystack, $needle) === 0;
7343
    }
7344
7345
    /**
7346
     * Returns true if the string begins with any of $substrings, false otherwise.
7347
     *
7348
     * - case-sensitive
7349
     *
7350
     * @param string $str        <p>The input string.</p>
7351
     * @param array  $substrings <p>Substrings to look for.</p>
7352
     *
7353
     * @return bool whether or not $str starts with $substring
7354
     */
7355 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7356
    {
7357 8
        if ($str === '') {
7358
            return false;
7359
        }
7360
7361 8
        if ($substrings === []) {
7362
            return false;
7363
        }
7364
7365 8
        foreach ($substrings as &$substring) {
7366 8
            if (self::str_starts_with($str, $substring)) {
7367 8
                return true;
7368
            }
7369
        }
7370
7371 6
        return false;
7372
    }
7373
7374
    /**
7375
     * Gets the substring after the first occurrence of a separator.
7376
     *
7377
     * @param string $str       <p>The input string.</p>
7378
     * @param string $separator <p>The string separator.</p>
7379
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7380
     *
7381
     * @return string
7382
     */
7383 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7384
    {
7385 1
        if ($separator === '' || $str === '') {
7386 1
            return '';
7387
        }
7388
7389 1
        if ($encoding === 'UTF-8') {
7390 1
            $offset = \mb_strpos($str, $separator);
7391 1
            if ($offset === false) {
7392 1
                return '';
7393
            }
7394
7395 1
            return (string) \mb_substr(
7396 1
                $str,
7397 1
                $offset + (int) \mb_strlen($separator)
7398
            );
7399
        }
7400
7401
        $offset = self::strpos($str, $separator, 0, $encoding);
7402
        if ($offset === false) {
7403
            return '';
7404
        }
7405
7406
        return (string) \mb_substr(
7407
            $str,
7408
            $offset + (int) self::strlen($separator, $encoding),
7409
            null,
7410
            $encoding
7411
        );
7412
    }
7413
7414
    /**
7415
     * Gets the substring after the last occurrence of a separator.
7416
     *
7417
     * @param string $str       <p>The input string.</p>
7418
     * @param string $separator <p>The string separator.</p>
7419
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7420
     *
7421
     * @return string
7422
     */
7423 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7424
    {
7425 1
        if ($separator === '' || $str === '') {
7426 1
            return '';
7427
        }
7428
7429 1
        if ($encoding === 'UTF-8') {
7430 1
            $offset = \mb_strrpos($str, $separator);
7431 1
            if ($offset === false) {
7432 1
                return '';
7433
            }
7434
7435 1
            return (string) \mb_substr(
7436 1
                $str,
7437 1
                $offset + (int) \mb_strlen($separator)
7438
            );
7439
        }
7440
7441
        $offset = self::strrpos($str, $separator, 0, $encoding);
7442
        if ($offset === false) {
7443
            return '';
7444
        }
7445
7446
        return (string) self::substr(
7447
            $str,
7448
            $offset + (int) self::strlen($separator, $encoding),
7449
            null,
7450
            $encoding
7451
        );
7452
    }
7453
7454
    /**
7455
     * Gets the substring before the first occurrence of a separator.
7456
     *
7457
     * @param string $str       <p>The input string.</p>
7458
     * @param string $separator <p>The string separator.</p>
7459
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7460
     *
7461
     * @return string
7462
     */
7463 1
    public static function str_substr_before_first_separator(
7464
        string $str,
7465
        string $separator,
7466
        string $encoding = 'UTF-8'
7467
    ): string {
7468 1
        if ($separator === '' || $str === '') {
7469 1
            return '';
7470
        }
7471
7472 1
        if ($encoding === 'UTF-8') {
7473 1
            $offset = \mb_strpos($str, $separator);
7474 1
            if ($offset === false) {
7475 1
                return '';
7476
            }
7477
7478 1
            return (string) \mb_substr(
7479 1
                $str,
7480 1
                0,
7481 1
                $offset
7482
            );
7483
        }
7484
7485
        $offset = self::strpos($str, $separator, 0, $encoding);
7486
        if ($offset === false) {
7487
            return '';
7488
        }
7489
7490
        return (string) self::substr(
7491
            $str,
7492
            0,
7493
            $offset,
7494
            $encoding
7495
        );
7496
    }
7497
7498
    /**
7499
     * Gets the substring before the last occurrence of a separator.
7500
     *
7501
     * @param string $str       <p>The input string.</p>
7502
     * @param string $separator <p>The string separator.</p>
7503
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7504
     *
7505
     * @return string
7506
     */
7507 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7508
    {
7509 1
        if ($separator === '' || $str === '') {
7510 1
            return '';
7511
        }
7512
7513 1
        if ($encoding === 'UTF-8') {
7514 1
            $offset = \mb_strrpos($str, $separator);
7515 1
            if ($offset === false) {
7516 1
                return '';
7517
            }
7518
7519 1
            return (string) \mb_substr(
7520 1
                $str,
7521 1
                0,
7522 1
                $offset
7523
            );
7524
        }
7525
7526
        $offset = self::strrpos($str, $separator, 0, $encoding);
7527
        if ($offset === false) {
7528
            return '';
7529
        }
7530
7531
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7532
7533
        return (string) self::substr(
7534
            $str,
7535
            0,
7536
            $offset,
7537
            $encoding
7538
        );
7539
    }
7540
7541
    /**
7542
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7543
     *
7544
     * @param string $str          <p>The input string.</p>
7545
     * @param string $needle       <p>The string to look for.</p>
7546
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7547
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7548
     *
7549
     * @return string
7550
     */
7551 2
    public static function str_substr_first(
7552
        string $str,
7553
        string $needle,
7554
        bool $beforeNeedle = false,
7555
        string $encoding = 'UTF-8'
7556
    ): string {
7557 2
        if ($str === '' || $needle === '') {
7558 2
            return '';
7559
        }
7560
7561 2
        if ($encoding === 'UTF-8') {
7562 2
            if ($beforeNeedle === true) {
7563 1
                $part = \mb_strstr(
7564 1
                    $str,
7565 1
                    $needle,
7566 1
                    $beforeNeedle
7567
                );
7568
            } else {
7569 1
                $part = \mb_strstr(
7570 1
                    $str,
7571 2
                    $needle
7572
                );
7573
            }
7574
        } else {
7575
            $part = self::strstr(
7576
                $str,
7577
                $needle,
7578
                $beforeNeedle,
7579
                $encoding
7580
            );
7581
        }
7582
7583 2
        return $part === false ? '' : $part;
7584
    }
7585
7586
    /**
7587
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7588
     *
7589
     * @param string $str          <p>The input string.</p>
7590
     * @param string $needle       <p>The string to look for.</p>
7591
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7592
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7593
     *
7594
     * @return string
7595
     */
7596 2
    public static function str_substr_last(
7597
        string $str,
7598
        string $needle,
7599
        bool $beforeNeedle = false,
7600
        string $encoding = 'UTF-8'
7601
    ): string {
7602 2
        if ($str === '' || $needle === '') {
7603 2
            return '';
7604
        }
7605
7606 2
        if ($encoding === 'UTF-8') {
7607 2
            if ($beforeNeedle === true) {
7608 1
                $part = \mb_strrchr(
7609 1
                    $str,
7610 1
                    $needle,
7611 1
                    $beforeNeedle
7612
                );
7613
            } else {
7614 1
                $part = \mb_strrchr(
7615 1
                    $str,
7616 2
                    $needle
7617
                );
7618
            }
7619
        } else {
7620
            $part = self::strrchr(
7621
                $str,
7622
                $needle,
7623
                $beforeNeedle,
7624
                $encoding
7625
            );
7626
        }
7627
7628 2
        return $part === false ? '' : $part;
7629
    }
7630
7631
    /**
7632
     * Surrounds $str with the given substring.
7633
     *
7634
     * @param string $str
7635
     * @param string $substring <p>The substring to add to both sides.</P>
7636
     *
7637
     * @return string string with the substring both prepended and appended
7638
     */
7639 5
    public static function str_surround(string $str, string $substring): string
7640
    {
7641 5
        return $substring . $str . $substring;
7642
    }
7643
7644
    /**
7645
     * Returns a trimmed string with the first letter of each word capitalized.
7646
     * Also accepts an array, $ignore, allowing you to list words not to be
7647
     * capitalized.
7648
     *
7649
     * @param string              $str
7650
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7651
     *                                                   Default: null</p>
7652
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7653
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7654
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7655
     *                                                   tr</p>
7656
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7657
     *                                                   ß</p>
7658
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7659
     *
7660
     * @return string the titleized string
7661
     */
7662 5
    public static function str_titleize(
7663
        string $str,
7664
        array $ignore = null,
7665
        string $encoding = 'UTF-8',
7666
        bool $cleanUtf8 = false,
7667
        string $lang = null,
7668
        bool $tryToKeepStringLength = false,
7669
        bool $useTrimFirst = true
7670
    ): string {
7671 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7672 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7673
        }
7674
7675 5
        if ($useTrimFirst === true) {
7676 5
            $str = \trim($str);
7677
        }
7678
7679 5
        if ($cleanUtf8 === true) {
7680
            $str = self::clean($str);
7681
        }
7682
7683 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7684
7685 5
        return (string) \preg_replace_callback(
7686 5
            '/([\S]+)/u',
7687
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7688 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7689 2
                    return $match[0];
7690
                }
7691
7692 5
                if ($useMbFunction === true) {
7693 5
                    if ($encoding === 'UTF-8') {
7694 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7695 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7696
                    }
7697
7698
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7699
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7700
                }
7701
7702
                return self::ucfirst(
7703
                    self::strtolower(
7704
                        $match[0],
7705
                        $encoding,
7706
                        false,
7707
                        $lang,
7708
                        $tryToKeepStringLength
7709
                    ),
7710
                    $encoding,
7711
                    false,
7712
                    $lang,
7713
                    $tryToKeepStringLength
7714
                );
7715 5
            },
7716 5
            $str
7717
        );
7718
    }
7719
7720
    /**
7721
     * Returns a trimmed string in proper title case.
7722
     *
7723
     * Also accepts an array, $ignore, allowing you to list words not to be
7724
     * capitalized.
7725
     *
7726
     * Adapted from John Gruber's script.
7727
     *
7728
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7729
     *
7730
     * @param string $str
7731
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7732
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7733
     *
7734
     * @return string the titleized string
7735
     */
7736 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7737
    {
7738 35
        $smallWords = \array_merge(
7739
            [
7740 35
                '(?<!q&)a',
7741
                'an',
7742
                'and',
7743
                'as',
7744
                'at(?!&t)',
7745
                'but',
7746
                'by',
7747
                'en',
7748
                'for',
7749
                'if',
7750
                'in',
7751
                'of',
7752
                'on',
7753
                'or',
7754
                'the',
7755
                'to',
7756
                'v[.]?',
7757
                'via',
7758
                'vs[.]?',
7759
            ],
7760 35
            $ignore
7761
        );
7762
7763 35
        $smallWordsRx = \implode('|', $smallWords);
7764 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7765
7766 35
        $str = \trim($str);
7767
7768 35
        if (self::has_lowercase($str) === false) {
7769 2
            $str = self::strtolower($str, $encoding);
7770
        }
7771
7772
        // the main substitutions
7773 35
        $str = (string) \preg_replace_callback(
7774
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7775
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7776 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7777
                        |
7778 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7779
                        |
7780 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7781
                        |
7782 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7783
                      ) (_*) \b                                                           # 6. With trailing underscore
7784
                    ~ux',
7785
            /**
7786
             * @param string[] $matches
7787
             *
7788
             * @return string
7789
             */
7790
            static function (array $matches) use ($encoding): string {
7791
                // preserve leading underscore
7792 35
                $str = $matches[1];
7793 35
                if ($matches[2]) {
7794
                    // preserve URLs, domains, emails and file paths
7795 5
                    $str .= $matches[2];
7796 35
                } elseif ($matches[3]) {
7797
                    // lower-case small words
7798 25
                    $str .= self::strtolower($matches[3], $encoding);
7799 35
                } elseif ($matches[4]) {
7800
                    // capitalize word w/o internal caps
7801 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7802
                } else {
7803
                    // preserve other kinds of word (iPhone)
7804 7
                    $str .= $matches[5];
7805
                }
7806
                // Preserve trailing underscore
7807 35
                $str .= $matches[6];
7808
7809 35
                return $str;
7810 35
            },
7811 35
            $str
7812
        );
7813
7814
        // Exceptions for small words: capitalize at start of title...
7815 35
        $str = (string) \preg_replace_callback(
7816
            '~(  \A [[:punct:]]*                # start of title...
7817
                      |  [:.;?!][ ]+               # or of subsentence...
7818
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7819 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7820
                     ~uxi',
7821
            /**
7822
             * @param string[] $matches
7823
             *
7824
             * @return string
7825
             */
7826
            static function (array $matches) use ($encoding): string {
7827 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7828 35
            },
7829 35
            $str
7830
        );
7831
7832
        // ...and end of title
7833 35
        $str = (string) \preg_replace_callback(
7834 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7835
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7836
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7837
                     ~uxi',
7838
            /**
7839
             * @param string[] $matches
7840
             *
7841
             * @return string
7842
             */
7843
            static function (array $matches) use ($encoding): string {
7844 3
                return static::str_upper_first($matches[1], $encoding);
7845 35
            },
7846 35
            $str
7847
        );
7848
7849
        // Exceptions for small words in hyphenated compound words.
7850
        // e.g. "in-flight" -> In-Flight
7851 35
        $str = (string) \preg_replace_callback(
7852
            '~\b
7853
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7854 35
                        ( ' . $smallWordsRx . ' )
7855
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7856
                       ~uxi',
7857
            /**
7858
             * @param string[] $matches
7859
             *
7860
             * @return string
7861
             */
7862
            static function (array $matches) use ($encoding): string {
7863
                return static::str_upper_first($matches[1], $encoding);
7864 35
            },
7865 35
            $str
7866
        );
7867
7868
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7869 35
        $str = (string) \preg_replace_callback(
7870
            '~\b
7871
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7872
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7873 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7874
                      (?!	- )                   # Negative lookahead for another -
7875
                     ~uxi',
7876
            /**
7877
             * @param string[] $matches
7878
             *
7879
             * @return string
7880
             */
7881
            static function (array $matches) use ($encoding): string {
7882
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7883 35
            },
7884 35
            $str
7885
        );
7886
7887 35
        return $str;
7888
    }
7889
7890
    /**
7891
     * Get a binary representation of a specific string.
7892
     *
7893
     * @param string $str <p>The input string.</p>
7894
     *
7895
     * @return string
7896
     */
7897 2
    public static function str_to_binary(string $str): string
7898
    {
7899 2
        $value = \unpack('H*', $str);
7900
7901 2
        return \base_convert($value[1], 16, 2);
7902
    }
7903
7904
    /**
7905
     * @param string   $str
7906
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7907
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7908
     *
7909
     * @return string[]
7910
     */
7911 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7912
    {
7913 17
        if ($str === '') {
7914 1
            return $removeEmptyValues === true ? [] : [''];
7915
        }
7916
7917 16
        if (self::$SUPPORT['mbstring'] === true) {
7918
            /** @noinspection PhpComposerExtensionStubsInspection */
7919 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7920
        } else {
7921
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7922
        }
7923
7924 16
        if ($return === false) {
7925
            return $removeEmptyValues === true ? [] : [''];
7926
        }
7927
7928
        if (
7929 16
            $removeShortValues === null
7930
            &&
7931 16
            $removeEmptyValues === false
7932
        ) {
7933 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7934
        }
7935
7936
        return self::reduce_string_array(
7937
            $return,
7938
            $removeEmptyValues,
7939
            $removeShortValues
7940
        );
7941
    }
7942
7943
    /**
7944
     * Convert a string into an array of words.
7945
     *
7946
     * @param string   $str
7947
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7948
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7949
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7950
     *
7951
     * @return string[]
7952
     */
7953 13
    public static function str_to_words(
7954
        string $str,
7955
        string $charList = '',
7956
        bool $removeEmptyValues = false,
7957
        int $removeShortValues = null
7958
    ): array {
7959 13
        if ($str === '') {
7960 4
            return $removeEmptyValues === true ? [] : [''];
7961
        }
7962
7963 13
        $charList = self::rxClass($charList, '\pL');
7964
7965 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7966 13
        if ($return === false) {
7967
            return $removeEmptyValues === true ? [] : [''];
7968
        }
7969
7970
        if (
7971 13
            $removeShortValues === null
7972
            &&
7973 13
            $removeEmptyValues === false
7974
        ) {
7975 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7976
        }
7977
7978 2
        $tmpReturn = self::reduce_string_array(
7979 2
            $return,
7980 2
            $removeEmptyValues,
7981 2
            $removeShortValues
7982
        );
7983
7984 2
        foreach ($tmpReturn as &$item) {
7985 2
            $item = (string) $item;
7986
        }
7987
7988 2
        return $tmpReturn;
7989
    }
7990
7991
    /**
7992
     * alias for "UTF8::to_ascii()"
7993
     *
7994
     * @see UTF8::to_ascii()
7995
     *
7996
     * @param string $str
7997
     * @param string $unknown
7998
     * @param bool   $strict
7999
     *
8000
     * @return string
8001
     */
8002 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
8003
    {
8004 8
        return self::to_ascii($str, $unknown, $strict);
8005
    }
8006
8007
    /**
8008
     * Truncates the string to a given length. If $substring is provided, and
8009
     * truncating occurs, the string is further truncated so that the substring
8010
     * may be appended without exceeding the desired length.
8011
     *
8012
     * @param string $str
8013
     * @param int    $length    <p>Desired length of the truncated string.</p>
8014
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8015
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8016
     *
8017
     * @return string string after truncating
8018
     */
8019 22
    public static function str_truncate(
8020
        string $str,
8021
        int $length,
8022
        string $substring = '',
8023
        string $encoding = 'UTF-8'
8024
    ): string {
8025 22
        if ($str === '') {
8026
            return '';
8027
        }
8028
8029 22
        if ($encoding === 'UTF-8') {
8030 10
            if ($length >= (int) \mb_strlen($str)) {
8031 2
                return $str;
8032
            }
8033
8034 8
            if ($substring !== '') {
8035 4
                $length -= (int) \mb_strlen($substring);
8036
8037
                /** @noinspection UnnecessaryCastingInspection */
8038 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8039
            }
8040
8041
            /** @noinspection UnnecessaryCastingInspection */
8042 4
            return (string) \mb_substr($str, 0, $length);
8043
        }
8044
8045 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8046
8047 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8048 2
            return $str;
8049
        }
8050
8051 10
        if ($substring !== '') {
8052 6
            $length -= (int) self::strlen($substring, $encoding);
8053
        }
8054
8055
        return (
8056 10
            (string) self::substr(
8057 10
                $str,
8058 10
                0,
8059 10
                $length,
8060 10
                $encoding
8061
            )
8062 10
       ) . $substring;
8063
    }
8064
8065
    /**
8066
     * Truncates the string to a given length, while ensuring that it does not
8067
     * split words. If $substring is provided, and truncating occurs, the
8068
     * string is further truncated so that the substring may be appended without
8069
     * exceeding the desired length.
8070
     *
8071
     * @param string $str
8072
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8073
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8074
     *                                                ''</p>
8075
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8076
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8077
     *
8078
     * @return string string after truncating
8079
     */
8080 47
    public static function str_truncate_safe(
8081
        string $str,
8082
        int $length,
8083
        string $substring = '',
8084
        string $encoding = 'UTF-8',
8085
        bool $ignoreDoNotSplitWordsForOneWord = false
8086
    ): string {
8087 47
        if ($str === '' || $length <= 0) {
8088 1
            return $substring;
8089
        }
8090
8091 47
        if ($encoding === 'UTF-8') {
8092 21
            if ($length >= (int) \mb_strlen($str)) {
8093 5
                return $str;
8094
            }
8095
8096
            // need to further trim the string so we can append the substring
8097 17
            $length -= (int) \mb_strlen($substring);
8098 17
            if ($length <= 0) {
8099 1
                return $substring;
8100
            }
8101
8102 17
            $truncated = \mb_substr($str, 0, $length);
8103
8104 17
            if ($truncated === false) {
8105
                return '';
8106
            }
8107
8108
            // if the last word was truncated
8109 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8110 17
            if ($strPosSpace !== $length) {
8111
                // find pos of the last occurrence of a space, get up to that
8112 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8113
8114
                if (
8115 13
                    $lastPos !== false
8116
                    ||
8117 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8118
                ) {
8119 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8120
                }
8121
            }
8122
        } else {
8123 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8124
8125 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8126 4
                return $str;
8127
            }
8128
8129
            // need to further trim the string so we can append the substring
8130 22
            $length -= (int) self::strlen($substring, $encoding);
8131 22
            if ($length <= 0) {
8132
                return $substring;
8133
            }
8134
8135 22
            $truncated = self::substr($str, 0, $length, $encoding);
8136
8137 22
            if ($truncated === false) {
8138
                return '';
8139
            }
8140
8141
            // if the last word was truncated
8142 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8143 22
            if ($strPosSpace !== $length) {
8144
                // find pos of the last occurrence of a space, get up to that
8145 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8146
8147
                if (
8148 12
                    $lastPos !== false
8149
                    ||
8150 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8151
                ) {
8152 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8153
                }
8154
            }
8155
        }
8156
8157 39
        return $truncated . $substring;
8158
    }
8159
8160
    /**
8161
     * Returns a lowercase and trimmed string separated by underscores.
8162
     * Underscores are inserted before uppercase characters (with the exception
8163
     * of the first character of the string), and in place of spaces as well as
8164
     * dashes.
8165
     *
8166
     * @param string $str
8167
     *
8168
     * @return string the underscored string
8169
     */
8170 16
    public static function str_underscored(string $str): string
8171
    {
8172 16
        return self::str_delimit($str, '_');
8173
    }
8174
8175
    /**
8176
     * Returns an UpperCamelCase version of the supplied string. It trims
8177
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8178
     * and underscores, and removes spaces, dashes, underscores.
8179
     *
8180
     * @param string      $str                   <p>The input string.</p>
8181
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8182
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8183
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8184
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8185
     *
8186
     * @return string string in UpperCamelCase
8187
     */
8188 13
    public static function str_upper_camelize(
8189
        string $str,
8190
        string $encoding = 'UTF-8',
8191
        bool $cleanUtf8 = false,
8192
        string $lang = null,
8193
        bool $tryToKeepStringLength = false
8194
    ): string {
8195 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8196
    }
8197
8198
    /**
8199
     * alias for "UTF8::ucfirst()"
8200
     *
8201
     * @see UTF8::ucfirst()
8202
     *
8203
     * @param string      $str
8204
     * @param string      $encoding
8205
     * @param bool        $cleanUtf8
8206
     * @param string|null $lang
8207
     * @param bool        $tryToKeepStringLength
8208
     *
8209
     * @return string
8210
     */
8211 39
    public static function str_upper_first(
8212
        string $str,
8213
        string $encoding = 'UTF-8',
8214
        bool $cleanUtf8 = false,
8215
        string $lang = null,
8216
        bool $tryToKeepStringLength = false
8217
    ): string {
8218 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8219
    }
8220
8221
    /**
8222
     * Counts number of words in the UTF-8 string.
8223
     *
8224
     * @param string $str      <p>The input string.</p>
8225
     * @param int    $format   [optional] <p>
8226
     *                         <strong>0</strong> => return a number of words (default)<br>
8227
     *                         <strong>1</strong> => return an array of words<br>
8228
     *                         <strong>2</strong> => return an array of words with word-offset as key
8229
     *                         </p>
8230
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8231
     *
8232
     * @return int|string[] The number of words in the string
8233
     */
8234 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8235
    {
8236 2
        $strParts = self::str_to_words($str, $charlist);
8237
8238 2
        $len = \count($strParts);
8239
8240 2
        if ($format === 1) {
8241 2
            $numberOfWords = [];
8242 2
            for ($i = 1; $i < $len; $i += 2) {
8243 2
                $numberOfWords[] = $strParts[$i];
8244
            }
8245 2
        } elseif ($format === 2) {
8246 2
            $numberOfWords = [];
8247 2
            $offset = (int) self::strlen($strParts[0]);
8248 2
            for ($i = 1; $i < $len; $i += 2) {
8249 2
                $numberOfWords[$offset] = $strParts[$i];
8250 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8251
            }
8252
        } else {
8253 2
            $numberOfWords = (int) (($len - 1) / 2);
8254
        }
8255
8256 2
        return $numberOfWords;
8257
    }
8258
8259
    /**
8260
     * Case-insensitive string comparison.
8261
     *
8262
     * INFO: Case-insensitive version of UTF8::strcmp()
8263
     *
8264
     * @param string $str1     <p>The first string.</p>
8265
     * @param string $str2     <p>The second string.</p>
8266
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8267
     *
8268
     * @return int
8269
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8270
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8271
     *             <strong>0</strong> if they are equal
8272
     */
8273 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8274
    {
8275 23
        return self::strcmp(
8276 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8277 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8278
        );
8279
    }
8280
8281
    /**
8282
     * alias for "UTF8::strstr()"
8283
     *
8284
     * @see UTF8::strstr()
8285
     *
8286
     * @param string $haystack
8287
     * @param string $needle
8288
     * @param bool   $before_needle
8289
     * @param string $encoding
8290
     * @param bool   $cleanUtf8
8291
     *
8292
     * @return false|string
8293
     */
8294 2
    public static function strchr(
8295
        string $haystack,
8296
        string $needle,
8297
        bool $before_needle = false,
8298
        string $encoding = 'UTF-8',
8299
        bool $cleanUtf8 = false
8300
    ) {
8301 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8302
    }
8303
8304
    /**
8305
     * Case-sensitive string comparison.
8306
     *
8307
     * @param string $str1 <p>The first string.</p>
8308
     * @param string $str2 <p>The second string.</p>
8309
     *
8310
     * @return int
8311
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8312
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8313
     *             <strong>0</strong> if they are equal
8314
     */
8315 29
    public static function strcmp(string $str1, string $str2): int
8316
    {
8317 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8318 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8319 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8320
        );
8321
    }
8322
8323
    /**
8324
     * Find length of initial segment not matching mask.
8325
     *
8326
     * @param string $str
8327
     * @param string $charList
8328
     * @param int    $offset
8329
     * @param int    $length
8330
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8331
     *
8332
     * @return int
8333
     */
8334 12
    public static function strcspn(
8335
        string $str,
8336
        string $charList,
8337
        int $offset = null,
8338
        int $length = null,
8339
        string $encoding = 'UTF-8'
8340
    ): int {
8341 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8342
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8343
        }
8344
8345 12
        if ($charList === '') {
8346 2
            return (int) self::strlen($str, $encoding);
8347
        }
8348
8349 11
        if ($offset !== null || $length !== null) {
8350 3
            if ($encoding === 'UTF-8') {
8351 3
                if ($length === null) {
8352
                    /** @noinspection UnnecessaryCastingInspection */
8353 2
                    $strTmp = \mb_substr($str, (int) $offset);
8354
                } else {
8355
                    /** @noinspection UnnecessaryCastingInspection */
8356 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8357
                }
8358
            } else {
8359
                /** @noinspection UnnecessaryCastingInspection */
8360
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8361
            }
8362
8363 3
            if ($strTmp === false) {
8364
                return 0;
8365
            }
8366
8367 3
            $str = $strTmp;
8368
        }
8369
8370 11
        if ($str === '') {
8371 2
            return 0;
8372
        }
8373
8374 10
        $matches = [];
8375 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8376 9
            $return = self::strlen($matches[1], $encoding);
8377 9
            if ($return === false) {
8378
                return 0;
8379
            }
8380
8381 9
            return $return;
8382
        }
8383
8384 2
        return (int) self::strlen($str, $encoding);
8385
    }
8386
8387
    /**
8388
     * alias for "UTF8::stristr()"
8389
     *
8390
     * @see UTF8::stristr()
8391
     *
8392
     * @param string $haystack
8393
     * @param string $needle
8394
     * @param bool   $before_needle
8395
     * @param string $encoding
8396
     * @param bool   $cleanUtf8
8397
     *
8398
     * @return false|string
8399
     */
8400 1
    public static function strichr(
8401
        string $haystack,
8402
        string $needle,
8403
        bool $before_needle = false,
8404
        string $encoding = 'UTF-8',
8405
        bool $cleanUtf8 = false
8406
    ) {
8407 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8408
    }
8409
8410
    /**
8411
     * Create a UTF-8 string from code points.
8412
     *
8413
     * INFO: opposite to UTF8::codepoints()
8414
     *
8415
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8416
     *
8417
     * @return string UTF-8 encoded string
8418
     */
8419 4
    public static function string(array $array): string
8420
    {
8421 4
        return \implode(
8422 4
            '',
8423 4
            \array_map(
8424
                [
8425 4
                    self::class,
8426
                    'chr',
8427
                ],
8428 4
                $array
8429
            )
8430
        );
8431
    }
8432
8433
    /**
8434
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8435
     *
8436
     * @param string $str <p>The input string.</p>
8437
     *
8438
     * @return bool
8439
     *              <strong>true</strong> if the string has BOM at the start,<br>
8440
     *              <strong>false</strong> otherwise
8441
     */
8442 6
    public static function string_has_bom(string $str): bool
8443
    {
8444
        /** @noinspection PhpUnusedLocalVariableInspection */
8445 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8446 6
            if (\strpos($str, $bomString) === 0) {
8447 6
                return true;
8448
            }
8449
        }
8450
8451 6
        return false;
8452
    }
8453
8454
    /**
8455
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8456
     *
8457
     * @see http://php.net/manual/en/function.strip-tags.php
8458
     *
8459
     * @param string $str            <p>
8460
     *                               The input string.
8461
     *                               </p>
8462
     * @param string $allowable_tags [optional] <p>
8463
     *                               You can use the optional second parameter to specify tags which should
8464
     *                               not be stripped.
8465
     *                               </p>
8466
     *                               <p>
8467
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8468
     *                               can not be changed with allowable_tags.
8469
     *                               </p>
8470
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8471
     *
8472
     * @return string the stripped string
8473
     */
8474 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8475
    {
8476 4
        if ($str === '') {
8477 1
            return '';
8478
        }
8479
8480 4
        if ($cleanUtf8 === true) {
8481 2
            $str = self::clean($str);
8482
        }
8483
8484 4
        if ($allowable_tags === null) {
8485 4
            return \strip_tags($str);
8486
        }
8487
8488 2
        return \strip_tags($str, $allowable_tags);
8489
    }
8490
8491
    /**
8492
     * Strip all whitespace characters. This includes tabs and newline
8493
     * characters, as well as multibyte whitespace such as the thin space
8494
     * and ideographic space.
8495
     *
8496
     * @param string $str
8497
     *
8498
     * @return string
8499
     */
8500 36
    public static function strip_whitespace(string $str): string
8501
    {
8502 36
        if ($str === '') {
8503 3
            return '';
8504
        }
8505
8506 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8507
    }
8508
8509
    /**
8510
     * Finds position of first occurrence of a string within another, case insensitive.
8511
     *
8512
     * @see http://php.net/manual/en/function.mb-stripos.php
8513
     *
8514
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8515
     * @param string $needle    <p>The string to find in haystack.</p>
8516
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8517
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8518
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8519
     *
8520
     * @return false|int
8521
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8522
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8523
     */
8524 24
    public static function stripos(
8525
        string $haystack,
8526
        string $needle,
8527
        int $offset = 0,
8528
        $encoding = 'UTF-8',
8529
        bool $cleanUtf8 = false
8530
    ) {
8531 24
        if ($haystack === '' || $needle === '') {
8532 5
            return false;
8533
        }
8534
8535 23
        if ($cleanUtf8 === true) {
8536
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8537
            // if invalid characters are found in $haystack before $needle
8538 1
            $haystack = self::clean($haystack);
8539 1
            $needle = self::clean($needle);
8540
        }
8541
8542 23
        if (self::$SUPPORT['mbstring'] === true) {
8543 23
            if ($encoding === 'UTF-8') {
8544 23
                return \mb_stripos($haystack, $needle, $offset);
8545
            }
8546
8547 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8548
8549 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8550
        }
8551
8552 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8553
8554
        if (
8555 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8556
            &&
8557 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8558
            &&
8559 2
            self::$SUPPORT['intl'] === true
8560
        ) {
8561
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8562
            if ($returnTmp !== false) {
8563
                return $returnTmp;
8564
            }
8565
        }
8566
8567
        //
8568
        // fallback for ascii only
8569
        //
8570
8571 2
        if (self::is_ascii($haystack . $needle)) {
8572
            return \stripos($haystack, $needle, $offset);
8573
        }
8574
8575
        //
8576
        // fallback via vanilla php
8577
        //
8578
8579 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8580 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8581
8582 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8583
    }
8584
8585
    /**
8586
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8587
     *
8588
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8589
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8590
     * @param bool   $before_needle [optional] <p>
8591
     *                              If <b>TRUE</b>, it returns the part of the
8592
     *                              haystack before the first occurrence of the needle (excluding the needle).
8593
     *                              </p>
8594
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8595
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8596
     *
8597
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8598
     */
8599 12
    public static function stristr(
8600
        string $haystack,
8601
        string $needle,
8602
        bool $before_needle = false,
8603
        string $encoding = 'UTF-8',
8604
        bool $cleanUtf8 = false
8605
    ) {
8606 12
        if ($haystack === '' || $needle === '') {
8607 3
            return false;
8608
        }
8609
8610 9
        if ($cleanUtf8 === true) {
8611
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8612
            // if invalid characters are found in $haystack before $needle
8613 1
            $needle = self::clean($needle);
8614 1
            $haystack = self::clean($haystack);
8615
        }
8616
8617 9
        if (!$needle) {
8618
            return $haystack;
8619
        }
8620
8621 9
        if (self::$SUPPORT['mbstring'] === true) {
8622 9
            if ($encoding === 'UTF-8') {
8623 9
                return \mb_stristr($haystack, $needle, $before_needle);
8624
            }
8625
8626 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8627
8628 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8629
        }
8630
8631
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8632
8633
        if (
8634
            $encoding !== 'UTF-8'
8635
            &&
8636
            self::$SUPPORT['mbstring'] === false
8637
        ) {
8638
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8639
        }
8640
8641
        if (
8642
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8643
            &&
8644
            self::$SUPPORT['intl'] === true
8645
        ) {
8646
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8647
            if ($returnTmp !== false) {
8648
                return $returnTmp;
8649
            }
8650
        }
8651
8652
        if (self::is_ascii($needle . $haystack)) {
8653
            return \stristr($haystack, $needle, $before_needle);
8654
        }
8655
8656
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8657
8658
        if (!isset($match[1])) {
8659
            return false;
8660
        }
8661
8662
        if ($before_needle) {
8663
            return $match[1];
8664
        }
8665
8666
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8667
    }
8668
8669
    /**
8670
     * Get the string length, not the byte-length!
8671
     *
8672
     * @see     http://php.net/manual/en/function.mb-strlen.php
8673
     *
8674
     * @param string $str       <p>The string being checked for length.</p>
8675
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8676
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8677
     *
8678
     * @return false|int
8679
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8680
     *                   $encoding.
8681
     *                   (One multi-byte character counted as +1).
8682
     *                   <br>
8683
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8684
     *                   chars.
8685
     */
8686 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8687
    {
8688 173
        if ($str === '') {
8689 21
            return 0;
8690
        }
8691
8692 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8693 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8694
        }
8695
8696 171
        if ($cleanUtf8 === true) {
8697
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8698
            // if invalid characters are found in $str
8699 4
            $str = self::clean($str);
8700
        }
8701
8702
        //
8703
        // fallback via mbstring
8704
        //
8705
8706 171
        if (self::$SUPPORT['mbstring'] === true) {
8707 165
            if ($encoding === 'UTF-8') {
8708 165
                return \mb_strlen($str);
8709
            }
8710
8711 4
            return \mb_strlen($str, $encoding);
8712
        }
8713
8714
        //
8715
        // fallback for binary || ascii only
8716
        //
8717
8718
        if (
8719 8
            $encoding === 'CP850'
8720
            ||
8721 8
            $encoding === 'ASCII'
8722
        ) {
8723
            return \strlen($str);
8724
        }
8725
8726
        if (
8727 8
            $encoding !== 'UTF-8'
8728
            &&
8729 8
            self::$SUPPORT['mbstring'] === false
8730
            &&
8731 8
            self::$SUPPORT['iconv'] === false
8732
        ) {
8733 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8734
        }
8735
8736
        //
8737
        // fallback via iconv
8738
        //
8739
8740 8
        if (self::$SUPPORT['iconv'] === true) {
8741
            $returnTmp = \iconv_strlen($str, $encoding);
8742
            if ($returnTmp !== false) {
8743
                return $returnTmp;
8744
            }
8745
        }
8746
8747
        //
8748
        // fallback via intl
8749
        //
8750
8751
        if (
8752 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8753
            &&
8754 8
            self::$SUPPORT['intl'] === true
8755
        ) {
8756
            $returnTmp = \grapheme_strlen($str);
8757
            if ($returnTmp !== null) {
8758
                return $returnTmp;
8759
            }
8760
        }
8761
8762
        //
8763
        // fallback for ascii only
8764
        //
8765
8766 8
        if (self::is_ascii($str)) {
8767 4
            return \strlen($str);
8768
        }
8769
8770
        //
8771
        // fallback via vanilla php
8772
        //
8773
8774 8
        \preg_match_all('/./us', $str, $parts);
8775
8776 8
        $returnTmp = \count($parts[0]);
8777 8
        if ($returnTmp === 0) {
8778
            return false;
8779
        }
8780
8781 8
        return $returnTmp;
8782
    }
8783
8784
    /**
8785
     * Get string length in byte.
8786
     *
8787
     * @param string $str
8788
     *
8789
     * @return int
8790
     */
8791
    public static function strlen_in_byte(string $str): int
8792
    {
8793
        if ($str === '') {
8794
            return 0;
8795
        }
8796
8797
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8798
            // "mb_" is available if overload is used, so use it ...
8799
            return \mb_strlen($str, 'CP850'); // 8-BIT
8800
        }
8801
8802
        return \strlen($str);
8803
    }
8804
8805
    /**
8806
     * Case insensitive string comparisons using a "natural order" algorithm.
8807
     *
8808
     * INFO: natural order version of UTF8::strcasecmp()
8809
     *
8810
     * @param string $str1     <p>The first string.</p>
8811
     * @param string $str2     <p>The second string.</p>
8812
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8813
     *
8814
     * @return int
8815
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8816
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8817
     *             <strong>0</strong> if they are equal
8818
     */
8819 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8820
    {
8821 2
        return self::strnatcmp(
8822 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8823 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8824
        );
8825
    }
8826
8827
    /**
8828
     * String comparisons using a "natural order" algorithm
8829
     *
8830
     * INFO: natural order version of UTF8::strcmp()
8831
     *
8832
     * @see  http://php.net/manual/en/function.strnatcmp.php
8833
     *
8834
     * @param string $str1 <p>The first string.</p>
8835
     * @param string $str2 <p>The second string.</p>
8836
     *
8837
     * @return int
8838
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8839
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8840
     *             <strong>0</strong> if they are equal
8841
     */
8842 4
    public static function strnatcmp(string $str1, string $str2): int
8843
    {
8844 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8845
    }
8846
8847
    /**
8848
     * Case-insensitive string comparison of the first n characters.
8849
     *
8850
     * @see  http://php.net/manual/en/function.strncasecmp.php
8851
     *
8852
     * @param string $str1     <p>The first string.</p>
8853
     * @param string $str2     <p>The second string.</p>
8854
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8855
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8856
     *
8857
     * @return int
8858
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8859
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8860
     *             <strong>0</strong> if they are equal
8861
     */
8862 2
    public static function strncasecmp(
8863
        string $str1,
8864
        string $str2,
8865
        int $len,
8866
        string $encoding = 'UTF-8'
8867
    ): int {
8868 2
        return self::strncmp(
8869 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8870 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8871 2
            $len
8872
        );
8873
    }
8874
8875
    /**
8876
     * String comparison of the first n characters.
8877
     *
8878
     * @see  http://php.net/manual/en/function.strncmp.php
8879
     *
8880
     * @param string $str1     <p>The first string.</p>
8881
     * @param string $str2     <p>The second string.</p>
8882
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8883
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8884
     *
8885
     * @return int
8886
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8887
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8888
     *             <strong>0</strong> if they are equal
8889
     */
8890 4
    public static function strncmp(
8891
        string $str1,
8892
        string $str2,
8893
        int $len,
8894
        string $encoding = 'UTF-8'
8895
    ): int {
8896 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8897
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8898
        }
8899
8900 4
        if ($encoding === 'UTF-8') {
8901 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8902 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8903
        } else {
8904
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8905
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8906
        }
8907
8908 4
        return self::strcmp($str1, $str2);
8909
    }
8910
8911
    /**
8912
     * Search a string for any of a set of characters.
8913
     *
8914
     * @see  http://php.net/manual/en/function.strpbrk.php
8915
     *
8916
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8917
     * @param string $char_list <p>This parameter is case sensitive.</p>
8918
     *
8919
     * @return false|string string starting from the character found, or false if it is not found
8920
     */
8921 2
    public static function strpbrk(string $haystack, string $char_list)
8922
    {
8923 2
        if ($haystack === '' || $char_list === '') {
8924 2
            return false;
8925
        }
8926
8927 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8928 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8929
        }
8930
8931 2
        return false;
8932
    }
8933
8934
    /**
8935
     * Find position of first occurrence of string in a string.
8936
     *
8937
     * @see http://php.net/manual/en/function.mb-strpos.php
8938
     *
8939
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8940
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8941
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8942
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8943
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8944
     *
8945
     * @return false|int
8946
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8947
     *                   string.<br> If needle is not found it returns false.
8948
     */
8949 53
    public static function strpos(
8950
        string $haystack,
8951
        $needle,
8952
        int $offset = 0,
8953
        $encoding = 'UTF-8',
8954
        bool $cleanUtf8 = false
8955
    ) {
8956 53
        if ($haystack === '') {
8957 4
            return false;
8958
        }
8959
8960
        // iconv and mbstring do not support integer $needle
8961 52
        if ((int) $needle === $needle) {
8962
            $needle = (string) self::chr($needle);
8963
        }
8964 52
        $needle = (string) $needle;
8965
8966 52
        if ($needle === '') {
8967 2
            return false;
8968
        }
8969
8970 52
        if ($cleanUtf8 === true) {
8971
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8972
            // if invalid characters are found in $haystack before $needle
8973 3
            $needle = self::clean($needle);
8974 3
            $haystack = self::clean($haystack);
8975
        }
8976
8977 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8978 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8979
        }
8980
8981
        //
8982
        // fallback via mbstring
8983
        //
8984
8985 52
        if (self::$SUPPORT['mbstring'] === true) {
8986 50
            if ($encoding === 'UTF-8') {
8987 50
                return \mb_strpos($haystack, $needle, $offset);
8988
            }
8989
8990 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8991
        }
8992
8993
        //
8994
        // fallback for binary || ascii only
8995
        //
8996
        if (
8997 4
            $encoding === 'CP850'
8998
            ||
8999 4
            $encoding === 'ASCII'
9000
        ) {
9001 2
            return \strpos($haystack, $needle, $offset);
9002
        }
9003
9004
        if (
9005 4
            $encoding !== 'UTF-8'
9006
            &&
9007 4
            self::$SUPPORT['iconv'] === false
9008
            &&
9009 4
            self::$SUPPORT['mbstring'] === false
9010
        ) {
9011 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9012
        }
9013
9014
        //
9015
        // fallback via intl
9016
        //
9017
9018
        if (
9019 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9020
            &&
9021 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9022
            &&
9023 4
            self::$SUPPORT['intl'] === true
9024
        ) {
9025
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9026
            if ($returnTmp !== false) {
9027
                return $returnTmp;
9028
            }
9029
        }
9030
9031
        //
9032
        // fallback via iconv
9033
        //
9034
9035
        if (
9036 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9037
            &&
9038 4
            self::$SUPPORT['iconv'] === true
9039
        ) {
9040
            // ignore invalid negative offset to keep compatibility
9041
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9042
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9043
            if ($returnTmp !== false) {
9044
                return $returnTmp;
9045
            }
9046
        }
9047
9048
        //
9049
        // fallback for ascii only
9050
        //
9051
9052 4
        if (self::is_ascii($haystack . $needle)) {
9053 2
            return \strpos($haystack, $needle, $offset);
9054
        }
9055
9056
        //
9057
        // fallback via vanilla php
9058
        //
9059
9060 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9061 4
        if ($haystackTmp === false) {
9062
            $haystackTmp = '';
9063
        }
9064 4
        $haystack = (string) $haystackTmp;
9065
9066 4
        if ($offset < 0) {
9067
            $offset = 0;
9068
        }
9069
9070 4
        $pos = \strpos($haystack, $needle);
9071 4
        if ($pos === false) {
9072 2
            return false;
9073
        }
9074
9075 4
        if ($pos) {
9076 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9077
        }
9078
9079 2
        return $offset + 0;
9080
    }
9081
9082
    /**
9083
     * Find position of first occurrence of string in a string.
9084
     *
9085
     * @param string $haystack <p>
9086
     *                         The string being checked.
9087
     *                         </p>
9088
     * @param string $needle   <p>
9089
     *                         The position counted from the beginning of haystack.
9090
     *                         </p>
9091
     * @param int    $offset   [optional] <p>
9092
     *                         The search offset. If it is not specified, 0 is used.
9093
     *                         </p>
9094
     *
9095
     * @return false|int The numeric position of the first occurrence of needle in the
9096
     *                   haystack string. If needle is not found, it returns false.
9097
     */
9098
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9099
    {
9100
        if ($haystack === '' || $needle === '') {
9101
            return false;
9102
        }
9103
9104
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9105
            // "mb_" is available if overload is used, so use it ...
9106
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9107
        }
9108
9109
        return \strpos($haystack, $needle, $offset);
9110
    }
9111
9112
    /**
9113
     * Finds the last occurrence of a character in a string within another.
9114
     *
9115
     * @see http://php.net/manual/en/function.mb-strrchr.php
9116
     *
9117
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9118
     * @param string $needle        <p>The string to find in haystack</p>
9119
     * @param bool   $before_needle [optional] <p>
9120
     *                              Determines which portion of haystack
9121
     *                              this function returns.
9122
     *                              If set to true, it returns all of haystack
9123
     *                              from the beginning to the last occurrence of needle.
9124
     *                              If set to false, it returns all of haystack
9125
     *                              from the last occurrence of needle to the end,
9126
     *                              </p>
9127
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9128
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9129
     *
9130
     * @return false|string the portion of haystack or false if needle is not found
9131
     */
9132 2
    public static function strrchr(
9133
        string $haystack,
9134
        string $needle,
9135
        bool $before_needle = false,
9136
        string $encoding = 'UTF-8',
9137
        bool $cleanUtf8 = false
9138
    ) {
9139 2
        if ($haystack === '' || $needle === '') {
9140 2
            return false;
9141
        }
9142
9143 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9144 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9145
        }
9146
9147 2
        if ($cleanUtf8 === true) {
9148
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9149
            // if invalid characters are found in $haystack before $needle
9150 2
            $needle = self::clean($needle);
9151 2
            $haystack = self::clean($haystack);
9152
        }
9153
9154
        //
9155
        // fallback via mbstring
9156
        //
9157
9158 2
        if (self::$SUPPORT['mbstring'] === true) {
9159 2
            if ($encoding === 'UTF-8') {
9160 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9161
            }
9162
9163 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9164
        }
9165
9166
        //
9167
        // fallback for binary || ascii only
9168
        //
9169
9170
        if (
9171
            $before_needle === false
9172
            &&
9173
            (
9174
                $encoding === 'CP850'
9175
                ||
9176
                $encoding === 'ASCII'
9177
            )
9178
        ) {
9179
            return \strrchr($haystack, $needle);
9180
        }
9181
9182
        if (
9183
            $encoding !== 'UTF-8'
9184
            &&
9185
            self::$SUPPORT['mbstring'] === false
9186
        ) {
9187
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9188
        }
9189
9190
        //
9191
        // fallback via iconv
9192
        //
9193
9194
        if (self::$SUPPORT['iconv'] === true) {
9195
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9196
            if ($needleTmp === false) {
9197
                return false;
9198
            }
9199
            $needle = (string) $needleTmp;
9200
9201
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9202
            if ($pos === false) {
9203
                return false;
9204
            }
9205
9206
            if ($before_needle) {
9207
                return self::substr($haystack, 0, $pos, $encoding);
9208
            }
9209
9210
            return self::substr($haystack, $pos, null, $encoding);
9211
        }
9212
9213
        //
9214
        // fallback via vanilla php
9215
        //
9216
9217
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9218
        if ($needleTmp === false) {
9219
            return false;
9220
        }
9221
        $needle = (string) $needleTmp;
9222
9223
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9224
        if ($pos === false) {
9225
            return false;
9226
        }
9227
9228
        if ($before_needle) {
9229
            return self::substr($haystack, 0, $pos, $encoding);
9230
        }
9231
9232
        return self::substr($haystack, $pos, null, $encoding);
9233
    }
9234
9235
    /**
9236
     * Reverses characters order in the string.
9237
     *
9238
     * @param string $str      <p>The input string.</p>
9239
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9240
     *
9241
     * @return string the string with characters in the reverse sequence
9242
     */
9243 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9244
    {
9245 10
        if ($str === '') {
9246 4
            return '';
9247
        }
9248
9249
        // init
9250 8
        $reversed = '';
9251
9252 8
        $str = self::emoji_encode($str, true);
9253
9254 8
        if ($encoding === 'UTF-8') {
9255 8
            if (self::$SUPPORT['intl'] === true) {
9256
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9257 8
                $i = (int) \grapheme_strlen($str);
9258 8
                while ($i--) {
9259 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9260 8
                    if ($reversedTmp !== false) {
9261 8
                        $reversed .= $reversedTmp;
9262
                    }
9263
                }
9264
            } else {
9265
                $i = (int) \mb_strlen($str);
9266 8
                while ($i--) {
9267
                    $reversedTmp = \mb_substr($str, $i, 1);
9268
                    if ($reversedTmp !== false) {
9269
                        $reversed .= $reversedTmp;
9270
                    }
9271
                }
9272
            }
9273
        } else {
9274
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9275
9276
            $i = (int) self::strlen($str, $encoding);
9277
            while ($i--) {
9278
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9279
                if ($reversedTmp !== false) {
9280
                    $reversed .= $reversedTmp;
9281
                }
9282
            }
9283
        }
9284
9285 8
        return self::emoji_decode($reversed, true);
9286
    }
9287
9288
    /**
9289
     * Finds the last occurrence of a character in a string within another, case insensitive.
9290
     *
9291
     * @see http://php.net/manual/en/function.mb-strrichr.php
9292
     *
9293
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9294
     * @param string $needle        <p>The string to find in haystack.</p>
9295
     * @param bool   $before_needle [optional] <p>
9296
     *                              Determines which portion of haystack
9297
     *                              this function returns.
9298
     *                              If set to true, it returns all of haystack
9299
     *                              from the beginning to the last occurrence of needle.
9300
     *                              If set to false, it returns all of haystack
9301
     *                              from the last occurrence of needle to the end,
9302
     *                              </p>
9303
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9304
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9305
     *
9306
     * @return false|string the portion of haystack or<br>false if needle is not found
9307
     */
9308 3
    public static function strrichr(
9309
        string $haystack,
9310
        string $needle,
9311
        bool $before_needle = false,
9312
        string $encoding = 'UTF-8',
9313
        bool $cleanUtf8 = false
9314
    ) {
9315 3
        if ($haystack === '' || $needle === '') {
9316 2
            return false;
9317
        }
9318
9319 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9320 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9321
        }
9322
9323 3
        if ($cleanUtf8 === true) {
9324
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9325
            // if invalid characters are found in $haystack before $needle
9326 2
            $needle = self::clean($needle);
9327 2
            $haystack = self::clean($haystack);
9328
        }
9329
9330
        //
9331
        // fallback via mbstring
9332
        //
9333
9334 3
        if (self::$SUPPORT['mbstring'] === true) {
9335 3
            if ($encoding === 'UTF-8') {
9336 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9337
            }
9338
9339 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9340
        }
9341
9342
        //
9343
        // fallback via vanilla php
9344
        //
9345
9346
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9347
        if ($needleTmp === false) {
9348
            return false;
9349
        }
9350
        $needle = (string) $needleTmp;
9351
9352
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9353
        if ($pos === false) {
9354
            return false;
9355
        }
9356
9357
        if ($before_needle) {
9358
            return self::substr($haystack, 0, $pos, $encoding);
9359
        }
9360
9361
        return self::substr($haystack, $pos, null, $encoding);
9362
    }
9363
9364
    /**
9365
     * Find position of last occurrence of a case-insensitive string.
9366
     *
9367
     * @param string     $haystack  <p>The string to look in.</p>
9368
     * @param int|string $needle    <p>The string to look for.</p>
9369
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9370
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9371
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9372
     *
9373
     * @return false|int
9374
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9375
     *                   string.<br>If needle is not found, it returns false.
9376
     */
9377 3
    public static function strripos(
9378
        string $haystack,
9379
        $needle,
9380
        int $offset = 0,
9381
        string $encoding = 'UTF-8',
9382
        bool $cleanUtf8 = false
9383
    ) {
9384 3
        if ($haystack === '') {
9385
            return false;
9386
        }
9387
9388
        // iconv and mbstring do not support integer $needle
9389 3
        if ((int) $needle === $needle && $needle >= 0) {
9390
            $needle = (string) self::chr($needle);
9391
        }
9392 3
        $needle = (string) $needle;
9393
9394 3
        if ($needle === '') {
9395
            return false;
9396
        }
9397
9398 3
        if ($cleanUtf8 === true) {
9399
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9400 2
            $needle = self::clean($needle);
9401 2
            $haystack = self::clean($haystack);
9402
        }
9403
9404 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9405 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9406
        }
9407
9408
        //
9409
        // fallback via mbstrig
9410
        //
9411
9412 3
        if (self::$SUPPORT['mbstring'] === true) {
9413 3
            if ($encoding === 'UTF-8') {
9414 3
                return \mb_strripos($haystack, $needle, $offset);
9415
            }
9416
9417
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9418
        }
9419
9420
        //
9421
        // fallback for binary || ascii only
9422
        //
9423
9424
        if (
9425
            $encoding === 'CP850'
9426
            ||
9427
            $encoding === 'ASCII'
9428
        ) {
9429
            return \strripos($haystack, $needle, $offset);
9430
        }
9431
9432
        if (
9433
            $encoding !== 'UTF-8'
9434
            &&
9435
            self::$SUPPORT['mbstring'] === false
9436
        ) {
9437
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9438
        }
9439
9440
        //
9441
        // fallback via intl
9442
        //
9443
9444
        if (
9445
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9446
            &&
9447
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9448
            &&
9449
            self::$SUPPORT['intl'] === true
9450
        ) {
9451
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9452
            if ($returnTmp !== false) {
9453
                return $returnTmp;
9454
            }
9455
        }
9456
9457
        //
9458
        // fallback for ascii only
9459
        //
9460
9461
        if (self::is_ascii($haystack . $needle)) {
9462
            return \strripos($haystack, $needle, $offset);
9463
        }
9464
9465
        //
9466
        // fallback via vanilla php
9467
        //
9468
9469
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9470
        $needle = self::strtocasefold($needle, true, false, $encoding);
9471
9472
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9473
    }
9474
9475
    /**
9476
     * Finds position of last occurrence of a string within another, case insensitive.
9477
     *
9478
     * @param string $haystack <p>
9479
     *                         The string from which to get the position of the last occurrence
9480
     *                         of needle.
9481
     *                         </p>
9482
     * @param string $needle   <p>
9483
     *                         The string to find in haystack.
9484
     *                         </p>
9485
     * @param int    $offset   [optional] <p>
9486
     *                         The position in haystack
9487
     *                         to start searching.
9488
     *                         </p>
9489
     *
9490
     * @return false|int return the numeric position of the last occurrence of needle in the
9491
     *                   haystack string, or false if needle is not found
9492
     */
9493
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9494
    {
9495
        if ($haystack === '' || $needle === '') {
9496
            return false;
9497
        }
9498
9499
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9500
            // "mb_" is available if overload is used, so use it ...
9501
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9502
        }
9503
9504
        return \strripos($haystack, $needle, $offset);
9505
    }
9506
9507
    /**
9508
     * Find position of last occurrence of a string in a string.
9509
     *
9510
     * @see http://php.net/manual/en/function.mb-strrpos.php
9511
     *
9512
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9513
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9514
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9515
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9516
     *                              the end of the string.
9517
     *                              </p>
9518
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9519
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9520
     *
9521
     * @return false|int
9522
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9523
     *                   string.<br>If needle is not found, it returns false.
9524
     */
9525 35
    public static function strrpos(
9526
        string $haystack,
9527
        $needle,
9528
        int $offset = 0,
9529
        string $encoding = 'UTF-8',
9530
        bool $cleanUtf8 = false
9531
    ) {
9532 35
        if ($haystack === '') {
9533 3
            return false;
9534
        }
9535
9536
        // iconv and mbstring do not support integer $needle
9537 34
        if ((int) $needle === $needle && $needle >= 0) {
9538 2
            $needle = (string) self::chr($needle);
9539
        }
9540 34
        $needle = (string) $needle;
9541
9542 34
        if ($needle === '') {
9543 2
            return false;
9544
        }
9545
9546 34
        if ($cleanUtf8 === true) {
9547
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9548 4
            $needle = self::clean($needle);
9549 4
            $haystack = self::clean($haystack);
9550
        }
9551
9552 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9553 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9554
        }
9555
9556
        //
9557
        // fallback via mbstring
9558
        //
9559
9560 34
        if (self::$SUPPORT['mbstring'] === true) {
9561 34
            if ($encoding === 'UTF-8') {
9562 34
                return \mb_strrpos($haystack, $needle, $offset);
9563
            }
9564
9565 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9566
        }
9567
9568
        //
9569
        // fallback for binary || ascii only
9570
        //
9571
9572
        if (
9573
            $encoding === 'CP850'
9574
            ||
9575
            $encoding === 'ASCII'
9576
        ) {
9577
            return \strrpos($haystack, $needle, $offset);
9578
        }
9579
9580
        if (
9581
            $encoding !== 'UTF-8'
9582
            &&
9583
            self::$SUPPORT['mbstring'] === false
9584
        ) {
9585
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9586
        }
9587
9588
        //
9589
        // fallback via intl
9590
        //
9591
9592
        if (
9593
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9594
            &&
9595
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9596
            &&
9597
            self::$SUPPORT['intl'] === true
9598
        ) {
9599
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9600
            if ($returnTmp !== false) {
9601
                return $returnTmp;
9602
            }
9603
        }
9604
9605
        //
9606
        // fallback for ascii only
9607
        //
9608
9609
        if (self::is_ascii($haystack . $needle)) {
9610
            return \strrpos($haystack, $needle, $offset);
9611
        }
9612
9613
        //
9614
        // fallback via vanilla php
9615
        //
9616
9617
        $haystackTmp = null;
9618
        if ($offset > 0) {
9619
            $haystackTmp = self::substr($haystack, $offset);
9620
        } elseif ($offset < 0) {
9621
            $haystackTmp = self::substr($haystack, 0, $offset);
9622
            $offset = 0;
9623
        }
9624
9625
        if ($haystackTmp !== null) {
9626
            if ($haystackTmp === false) {
9627
                $haystackTmp = '';
9628
            }
9629
            $haystack = (string) $haystackTmp;
9630
        }
9631
9632
        $pos = \strrpos($haystack, $needle);
9633
        if ($pos === false) {
9634
            return false;
9635
        }
9636
9637
        $strTmp = \substr($haystack, 0, $pos);
9638
        if ($strTmp === false) {
9639
            return false;
9640
        }
9641
9642
        return $offset + (int) self::strlen($strTmp);
9643
    }
9644
9645
    /**
9646
     * Find position of last occurrence of a string in a string.
9647
     *
9648
     * @param string $haystack <p>
9649
     *                         The string being checked, for the last occurrence
9650
     *                         of needle.
9651
     *                         </p>
9652
     * @param string $needle   <p>
9653
     *                         The string to find in haystack.
9654
     *                         </p>
9655
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9656
     *                         the string. Negative values will stop searching at an arbitrary point
9657
     *                         prior to the end of the string.
9658
     *
9659
     * @return false|int The numeric position of the last occurrence of needle in the
9660
     *                   haystack string. If needle is not found, it returns false.
9661
     */
9662
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9663
    {
9664
        if ($haystack === '' || $needle === '') {
9665
            return false;
9666
        }
9667
9668
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9669
            // "mb_" is available if overload is used, so use it ...
9670
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9671
        }
9672
9673
        return \strrpos($haystack, $needle, $offset);
9674
    }
9675
9676
    /**
9677
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9678
     * mask.
9679
     *
9680
     * @param string $str      <p>The input string.</p>
9681
     * @param string $mask     <p>The mask of chars</p>
9682
     * @param int    $offset   [optional]
9683
     * @param int    $length   [optional]
9684
     * @param string $encoding [optional] <p>Set the charset.</p>
9685
     *
9686
     * @return false|int
9687
     */
9688 10
    public static function strspn(
9689
        string $str,
9690
        string $mask,
9691
        int $offset = 0,
9692
        int $length = null,
9693
        string $encoding = 'UTF-8'
9694
    ) {
9695 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9696
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9697
        }
9698
9699 10
        if ($offset || $length !== null) {
9700 2
            if ($encoding === 'UTF-8') {
9701 2
                if ($length === null) {
9702
                    $str = (string) \mb_substr($str, $offset);
9703
                } else {
9704 2
                    $str = (string) \mb_substr($str, $offset, $length);
9705
                }
9706
            } else {
9707
                $str = (string) self::substr($str, $offset, $length, $encoding);
9708
            }
9709
        }
9710
9711 10
        if ($str === '' || $mask === '') {
9712 2
            return 0;
9713
        }
9714
9715 8
        $matches = [];
9716
9717 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9718
    }
9719
9720
    /**
9721
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9722
     *
9723
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9724
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9725
     * @param bool   $before_needle [optional] <p>
9726
     *                              If <b>TRUE</b>, strstr() returns the part of the
9727
     *                              haystack before the first occurrence of the needle (excluding the needle).
9728
     *                              </p>
9729
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9730
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9731
     *
9732
     * @return false|string
9733
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9734
     */
9735 3
    public static function strstr(
9736
        string $haystack,
9737
        string $needle,
9738
        bool $before_needle = false,
9739
        string $encoding = 'UTF-8',
9740
        $cleanUtf8 = false
9741
    ) {
9742 3
        if ($haystack === '' || $needle === '') {
9743 2
            return false;
9744
        }
9745
9746 3
        if ($cleanUtf8 === true) {
9747
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9748
            // if invalid characters are found in $haystack before $needle
9749
            $needle = self::clean($needle);
9750
            $haystack = self::clean($haystack);
9751
        }
9752
9753 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9754 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9755
        }
9756
9757
        //
9758
        // fallback via mbstring
9759
        //
9760
9761 3
        if (self::$SUPPORT['mbstring'] === true) {
9762 3
            if ($encoding === 'UTF-8') {
9763 3
                return \mb_strstr($haystack, $needle, $before_needle);
9764
            }
9765
9766 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9767
        }
9768
9769
        //
9770
        // fallback for binary || ascii only
9771
        //
9772
9773
        if (
9774
            $encoding === 'CP850'
9775
            ||
9776
            $encoding === 'ASCII'
9777
        ) {
9778
            return \strstr($haystack, $needle, $before_needle);
9779
        }
9780
9781
        if (
9782
            $encoding !== 'UTF-8'
9783
            &&
9784
            self::$SUPPORT['mbstring'] === false
9785
        ) {
9786
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9787
        }
9788
9789
        //
9790
        // fallback via intl
9791
        //
9792
9793
        if (
9794
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9795
            &&
9796
            self::$SUPPORT['intl'] === true
9797
        ) {
9798
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9799
            if ($returnTmp !== false) {
9800
                return $returnTmp;
9801
            }
9802
        }
9803
9804
        //
9805
        // fallback for ascii only
9806
        //
9807
9808
        if (self::is_ascii($haystack . $needle)) {
9809
            return \strstr($haystack, $needle, $before_needle);
9810
        }
9811
9812
        //
9813
        // fallback via vanilla php
9814
        //
9815
9816
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9817
9818
        if (!isset($match[1])) {
9819
            return false;
9820
        }
9821
9822
        if ($before_needle) {
9823
            return $match[1];
9824
        }
9825
9826
        return self::substr($haystack, (int) self::strlen($match[1]));
9827
    }
9828
9829
    /**
9830
     *  * Finds first occurrence of a string within another.
9831
     *
9832
     * @param string $haystack      <p>
9833
     *                              The string from which to get the first occurrence
9834
     *                              of needle.
9835
     *                              </p>
9836
     * @param string $needle        <p>
9837
     *                              The string to find in haystack.
9838
     *                              </p>
9839
     * @param bool   $before_needle [optional] <p>
9840
     *                              Determines which portion of haystack
9841
     *                              this function returns.
9842
     *                              If set to true, it returns all of haystack
9843
     *                              from the beginning to the first occurrence of needle.
9844
     *                              If set to false, it returns all of haystack
9845
     *                              from the first occurrence of needle to the end,
9846
     *                              </p>
9847
     *
9848
     * @return false|string the portion of haystack,
9849
     *                      or false if needle is not found
9850
     */
9851
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9852
    {
9853
        if ($haystack === '' || $needle === '') {
9854
            return false;
9855
        }
9856
9857
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9858
            // "mb_" is available if overload is used, so use it ...
9859
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9860
        }
9861
9862
        return \strstr($haystack, $needle, $before_needle);
9863
    }
9864
9865
    /**
9866
     * Unicode transformation for case-less matching.
9867
     *
9868
     * @see http://unicode.org/reports/tr21/tr21-5.html
9869
     *
9870
     * @param string      $str       <p>The input string.</p>
9871
     * @param bool        $full      [optional] <p>
9872
     *                               <b>true</b>, replace full case folding chars (default)<br>
9873
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9874
     *                               </p>
9875
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9876
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9877
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9878
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9879
     *                               is for some languages better ...</p>
9880
     *
9881
     * @return string
9882
     */
9883 32
    public static function strtocasefold(
9884
        string $str,
9885
        bool $full = true,
9886
        bool $cleanUtf8 = false,
9887
        string $encoding = 'UTF-8',
9888
        string $lang = null,
9889
        $lower = true
9890
    ): string {
9891 32
        if ($str === '') {
9892 5
            return '';
9893
        }
9894
9895 31
        if ($cleanUtf8 === true) {
9896
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9897
            // if invalid characters are found in $haystack before $needle
9898 2
            $str = self::clean($str);
9899
        }
9900
9901 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9902
9903 31
        if ($lang === null && $encoding === 'UTF-8') {
9904 31
            if ($lower === true) {
9905 2
                return \mb_strtolower($str);
9906
            }
9907
9908 29
            return \mb_strtoupper($str);
9909
        }
9910
9911 2
        if ($lower === true) {
9912
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9913
        }
9914
9915 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9916
    }
9917
9918
    /**
9919
     * Make a string lowercase.
9920
     *
9921
     * @see http://php.net/manual/en/function.mb-strtolower.php
9922
     *
9923
     * @param string      $str                   <p>The string being lowercased.</p>
9924
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9925
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9926
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9927
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9928
     *
9929
     * @return string
9930
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9931
     */
9932 73
    public static function strtolower(
9933
        $str,
9934
        string $encoding = 'UTF-8',
9935
        bool $cleanUtf8 = false,
9936
        string $lang = null,
9937
        bool $tryToKeepStringLength = false
9938
    ): string {
9939
        // init
9940 73
        $str = (string) $str;
9941
9942 73
        if ($str === '') {
9943 1
            return '';
9944
        }
9945
9946 72
        if ($cleanUtf8 === true) {
9947
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9948
            // if invalid characters are found in $haystack before $needle
9949 2
            $str = self::clean($str);
9950
        }
9951
9952
        // hack for old php version or for the polyfill ...
9953 72
        if ($tryToKeepStringLength === true) {
9954
            $str = self::fixStrCaseHelper($str, true);
9955
        }
9956
9957 72
        if ($lang === null && $encoding === 'UTF-8') {
9958 13
            return \mb_strtolower($str);
9959
        }
9960
9961 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9962
9963 61
        if ($lang !== null) {
9964 2
            if (self::$SUPPORT['intl'] === true) {
9965 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
9966
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
9967
                }
9968
9969 2
                $langCode = $lang . '-Lower';
9970 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
9971
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9972
9973
                    $langCode = 'Any-Lower';
9974
                }
9975
9976
                /** @noinspection PhpComposerExtensionStubsInspection */
9977
                /** @noinspection UnnecessaryCastingInspection */
9978 2
                return (string) \transliterator_transliterate($langCode, $str);
9979
            }
9980
9981
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9982
        }
9983
9984
        // always fallback via symfony polyfill
9985 61
        return \mb_strtolower($str, $encoding);
9986
    }
9987
9988
    /**
9989
     * Make a string uppercase.
9990
     *
9991
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9992
     *
9993
     * @param string      $str                   <p>The string being uppercased.</p>
9994
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9995
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9996
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9997
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9998
     *
9999
     * @return string
10000
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10001
     */
10002 17
    public static function strtoupper(
10003
        $str,
10004
        string $encoding = 'UTF-8',
10005
        bool $cleanUtf8 = false,
10006
        string $lang = null,
10007
        bool $tryToKeepStringLength = false
10008
    ): string {
10009
        // init
10010 17
        $str = (string) $str;
10011
10012 17
        if ($str === '') {
10013 1
            return '';
10014
        }
10015
10016 16
        if ($cleanUtf8 === true) {
10017
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10018
            // if invalid characters are found in $haystack before $needle
10019 2
            $str = self::clean($str);
10020
        }
10021
10022
        // hack for old php version or for the polyfill ...
10023 16
        if ($tryToKeepStringLength === true) {
10024 2
            $str = self::fixStrCaseHelper($str, false);
10025
        }
10026
10027 16
        if ($lang === null && $encoding === 'UTF-8') {
10028 8
            return \mb_strtoupper($str);
10029
        }
10030
10031 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10032
10033 10
        if ($lang !== null) {
10034 2
            if (self::$SUPPORT['intl'] === true) {
10035 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10036
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10037
                }
10038
10039 2
                $langCode = $lang . '-Upper';
10040 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10041
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10042
10043
                    $langCode = 'Any-Upper';
10044
                }
10045
10046
                /** @noinspection PhpComposerExtensionStubsInspection */
10047
                /** @noinspection UnnecessaryCastingInspection */
10048 2
                return (string) \transliterator_transliterate($langCode, $str);
10049
            }
10050
10051
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10052
        }
10053
10054
        // always fallback via symfony polyfill
10055 10
        return \mb_strtoupper($str, $encoding);
10056
    }
10057
10058
    /**
10059
     * Translate characters or replace sub-strings.
10060
     *
10061
     * @see  http://php.net/manual/en/function.strtr.php
10062
     *
10063
     * @param string          $str  <p>The string being translated.</p>
10064
     * @param string|string[] $from <p>The string replacing from.</p>
10065
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10066
     *
10067
     * @return string
10068
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10069
     *                corresponding character in to
10070
     */
10071 2
    public static function strtr(string $str, $from, $to = ''): string
10072
    {
10073 2
        if ($str === '') {
10074
            return '';
10075
        }
10076
10077 2
        if ($from === $to) {
10078
            return $str;
10079
        }
10080
10081 2
        if ($to !== '') {
10082 2
            $from = self::str_split($from);
10083 2
            $to = self::str_split($to);
10084 2
            $countFrom = \count($from);
10085 2
            $countTo = \count($to);
10086
10087 2
            if ($countFrom > $countTo) {
10088 2
                $from = \array_slice($from, 0, $countTo);
10089 2
            } elseif ($countFrom < $countTo) {
10090 2
                $to = \array_slice($to, 0, $countFrom);
10091
            }
10092
10093 2
            $from = \array_combine($from, $to);
10094 2
            if ($from === false) {
10095
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10096
            }
10097
        }
10098
10099 2
        if (\is_string($from)) {
10100 2
            return \str_replace($from, '', $str);
10101
        }
10102
10103 2
        return \strtr($str, $from);
10104
    }
10105
10106
    /**
10107
     * Return the width of a string.
10108
     *
10109
     * @param string $str       <p>The input string.</p>
10110
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10111
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10112
     *
10113
     * @return int
10114
     */
10115 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10116
    {
10117 2
        if ($str === '') {
10118 2
            return 0;
10119
        }
10120
10121 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10122 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10123
        }
10124
10125 2
        if ($cleanUtf8 === true) {
10126
            // iconv and mbstring are not tolerant to invalid encoding
10127
            // further, their behaviour is inconsistent with that of PHP's substr
10128 2
            $str = self::clean($str);
10129
        }
10130
10131
        //
10132
        // fallback via mbstring
10133
        //
10134
10135 2
        if (self::$SUPPORT['mbstring'] === true) {
10136 2
            if ($encoding === 'UTF-8') {
10137 2
                return \mb_strwidth($str);
10138
            }
10139
10140
            return \mb_strwidth($str, $encoding);
10141
        }
10142
10143
        //
10144
        // fallback via vanilla php
10145
        //
10146
10147
        if ($encoding !== 'UTF-8') {
10148
            $str = self::encode('UTF-8', $str, false, $encoding);
10149
        }
10150
10151
        $wide = 0;
10152
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10153
10154
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10155
    }
10156
10157
    /**
10158
     * Get part of a string.
10159
     *
10160
     * @see http://php.net/manual/en/function.mb-substr.php
10161
     *
10162
     * @param string $str       <p>The string being checked.</p>
10163
     * @param int    $offset    <p>The first position used in str.</p>
10164
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10165
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10166
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10167
     *
10168
     * @return false|string
10169
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10170
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10171
     *                      characters long, <b>FALSE</b> will be returned.
10172
     */
10173 172
    public static function substr(
10174
        string $str,
10175
        int $offset = 0,
10176
        int $length = null,
10177
        string $encoding = 'UTF-8',
10178
        bool $cleanUtf8 = false
10179
    ) {
10180
        // empty string
10181 172
        if ($str === '' || $length === 0) {
10182 8
            return '';
10183
        }
10184
10185 168
        if ($cleanUtf8 === true) {
10186
            // iconv and mbstring are not tolerant to invalid encoding
10187
            // further, their behaviour is inconsistent with that of PHP's substr
10188 2
            $str = self::clean($str);
10189
        }
10190
10191
        // whole string
10192 168
        if (!$offset && $length === null) {
10193 7
            return $str;
10194
        }
10195
10196 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10197 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10198
        }
10199
10200
        //
10201
        // fallback via mbstring
10202
        //
10203
10204 163
        if (self::$SUPPORT['mbstring'] === true) {
10205 161
            if ($encoding === 'UTF-8') {
10206 161
                if ($length === null) {
10207 64
                    return \mb_substr($str, $offset);
10208
                }
10209
10210 102
                return \mb_substr($str, $offset, $length);
10211
            }
10212
10213
            return self::substr($str, $offset, $length, $encoding);
10214
        }
10215
10216
        //
10217
        // fallback for binary || ascii only
10218
        //
10219
10220
        if (
10221 4
            $encoding === 'CP850'
10222
            ||
10223 4
            $encoding === 'ASCII'
10224
        ) {
10225
            if ($length === null) {
10226
                return \substr($str, $offset);
10227
            }
10228
10229
            return \substr($str, $offset, $length);
10230
        }
10231
10232
        // otherwise we need the string-length
10233 4
        $str_length = 0;
10234 4
        if ($offset || $length === null) {
10235 4
            $str_length = self::strlen($str, $encoding);
10236
        }
10237
10238
        // e.g.: invalid chars + mbstring not installed
10239 4
        if ($str_length === false) {
10240
            return false;
10241
        }
10242
10243
        // empty string
10244 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10245
            return '';
10246
        }
10247
10248
        // impossible
10249 4
        if ($offset && $offset > $str_length) {
10250
            return '';
10251
        }
10252
10253 4
        if ($length === null) {
10254 4
            $length = (int) $str_length;
10255
        } else {
10256 2
            $length = (int) $length;
10257
        }
10258
10259
        if (
10260 4
            $encoding !== 'UTF-8'
10261
            &&
10262 4
            self::$SUPPORT['mbstring'] === false
10263
        ) {
10264 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10265
        }
10266
10267
        //
10268
        // fallback via intl
10269
        //
10270
10271
        if (
10272 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10273
            &&
10274 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10275
            &&
10276 4
            self::$SUPPORT['intl'] === true
10277
        ) {
10278
            $returnTmp = \grapheme_substr($str, $offset, $length);
10279
            if ($returnTmp !== false) {
10280
                return $returnTmp;
10281
            }
10282
        }
10283
10284
        //
10285
        // fallback via iconv
10286
        //
10287
10288
        if (
10289 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10290
            &&
10291 4
            self::$SUPPORT['iconv'] === true
10292
        ) {
10293
            $returnTmp = \iconv_substr($str, $offset, $length);
10294
            if ($returnTmp !== false) {
10295
                return $returnTmp;
10296
            }
10297
        }
10298
10299
        //
10300
        // fallback for ascii only
10301
        //
10302
10303 4
        if (self::is_ascii($str)) {
10304
            return \substr($str, $offset, $length);
10305
        }
10306
10307
        //
10308
        // fallback via vanilla php
10309
        //
10310
10311
        // split to array, and remove invalid characters
10312 4
        $array = self::str_split($str);
10313
10314
        // extract relevant part, and join to make sting again
10315 4
        return \implode('', \array_slice($array, $offset, $length));
10316
    }
10317
10318
    /**
10319
     * Binary safe comparison of two strings from an offset, up to length characters.
10320
     *
10321
     * @param string   $str1               <p>The main string being compared.</p>
10322
     * @param string   $str2               <p>The secondary string being compared.</p>
10323
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10324
     *                                     counting from the end of the string.</p>
10325
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10326
     *                                     of the length of the str compared to the length of main_str less the
10327
     *                                     offset.</p>
10328
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10329
     *                                     insensitive.</p>
10330
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10331
     *
10332
     * @return int
10333
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10334
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10335
     *             <strong>0</strong> if they are equal
10336
     */
10337 2
    public static function substr_compare(
10338
        string $str1,
10339
        string $str2,
10340
        int $offset = 0,
10341
        int $length = null,
10342
        bool $case_insensitivity = false,
10343
        string $encoding = 'UTF-8'
10344
    ): int {
10345
        if (
10346 2
            $offset !== 0
10347
            ||
10348 2
            $length !== null
10349
        ) {
10350 2
            if ($encoding === 'UTF-8') {
10351 2
                if ($length === null) {
10352 2
                    $str1 = (string) \mb_substr($str1, $offset);
10353
                } else {
10354 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10355
                }
10356 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10357
            } else {
10358
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10359
10360
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10361
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10362
            }
10363
        }
10364
10365 2
        if ($case_insensitivity === true) {
10366 2
            return self::strcasecmp($str1, $str2, $encoding);
10367
        }
10368
10369 2
        return self::strcmp($str1, $str2);
10370
    }
10371
10372
    /**
10373
     * Count the number of substring occurrences.
10374
     *
10375
     * @see  http://php.net/manual/en/function.substr-count.php
10376
     *
10377
     * @param string $haystack  <p>The string to search in.</p>
10378
     * @param string $needle    <p>The substring to search for.</p>
10379
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10380
     * @param int    $length    [optional] <p>
10381
     *                          The maximum length after the specified offset to search for the
10382
     *                          substring. It outputs a warning if the offset plus the length is
10383
     *                          greater than the haystack length.
10384
     *                          </p>
10385
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10386
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10387
     *
10388
     * @return false|int this functions returns an integer or false if there isn't a string
10389
     */
10390 5
    public static function substr_count(
10391
        string $haystack,
10392
        string $needle,
10393
        int $offset = 0,
10394
        int $length = null,
10395
        string $encoding = 'UTF-8',
10396
        bool $cleanUtf8 = false
10397
    ) {
10398 5
        if ($haystack === '' || $needle === '') {
10399 2
            return false;
10400
        }
10401
10402 5
        if ($length === 0) {
10403 2
            return 0;
10404
        }
10405
10406 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10407 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10408
        }
10409
10410 5
        if ($cleanUtf8 === true) {
10411
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10412
            // if invalid characters are found in $haystack before $needle
10413
            $needle = self::clean($needle);
10414
            $haystack = self::clean($haystack);
10415
        }
10416
10417 5
        if ($offset || $length > 0) {
10418 2
            if ($length === null) {
10419 2
                $lengthTmp = self::strlen($haystack, $encoding);
10420 2
                if ($lengthTmp === false) {
10421
                    return false;
10422
                }
10423 2
                $length = (int) $lengthTmp;
10424
            }
10425
10426 2
            if ($encoding === 'UTF-8') {
10427 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10428
            } else {
10429 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10430
            }
10431
        }
10432
10433
        if (
10434 5
            $encoding !== 'UTF-8'
10435
            &&
10436 5
            self::$SUPPORT['mbstring'] === false
10437
        ) {
10438
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10439
        }
10440
10441 5
        if (self::$SUPPORT['mbstring'] === true) {
10442 5
            if ($encoding === 'UTF-8') {
10443 5
                return \mb_substr_count($haystack, $needle);
10444
            }
10445
10446 2
            return \mb_substr_count($haystack, $needle, $encoding);
10447
        }
10448
10449
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10450
10451
        return \count($matches);
10452
    }
10453
10454
    /**
10455
     * Count the number of substring occurrences.
10456
     *
10457
     * @param string $haystack <p>
10458
     *                         The string being checked.
10459
     *                         </p>
10460
     * @param string $needle   <p>
10461
     *                         The string being found.
10462
     *                         </p>
10463
     * @param int    $offset   [optional] <p>
10464
     *                         The offset where to start counting
10465
     *                         </p>
10466
     * @param int    $length   [optional] <p>
10467
     *                         The maximum length after the specified offset to search for the
10468
     *                         substring. It outputs a warning if the offset plus the length is
10469
     *                         greater than the haystack length.
10470
     *                         </p>
10471
     *
10472
     * @return false|int the number of times the
10473
     *                   needle substring occurs in the
10474
     *                   haystack string
10475
     */
10476
    public static function substr_count_in_byte(
10477
        string $haystack,
10478
        string $needle,
10479
        int $offset = 0,
10480
        int $length = null
10481
    ) {
10482
        if ($haystack === '' || $needle === '') {
10483
            return 0;
10484
        }
10485
10486
        if (
10487
            ($offset || $length !== null)
10488
            &&
10489
            self::$SUPPORT['mbstring_func_overload'] === true
10490
        ) {
10491
            if ($length === null) {
10492
                $lengthTmp = self::strlen($haystack);
10493
                if ($lengthTmp === false) {
10494
                    return false;
10495
                }
10496
                $length = (int) $lengthTmp;
10497
            }
10498
10499
            if (
10500
                (
10501
                    $length !== 0
10502
                    &&
10503
                    $offset !== 0
10504
                )
10505
                &&
10506
                ($length + $offset) <= 0
10507
                &&
10508
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10509
            ) {
10510
                return false;
10511
            }
10512
10513
            $haystackTmp = \substr($haystack, $offset, $length);
10514
            if ($haystackTmp === false) {
10515
                $haystackTmp = '';
10516
            }
10517
            $haystack = (string) $haystackTmp;
10518
        }
10519
10520
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10521
            // "mb_" is available if overload is used, so use it ...
10522
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10523
        }
10524
10525
        if ($length === null) {
10526
            return \substr_count($haystack, $needle, $offset);
10527
        }
10528
10529
        return \substr_count($haystack, $needle, $offset, $length);
10530
    }
10531
10532
    /**
10533
     * Returns the number of occurrences of $substring in the given string.
10534
     * By default, the comparison is case-sensitive, but can be made insensitive
10535
     * by setting $caseSensitive to false.
10536
     *
10537
     * @param string $str           <p>The input string.</p>
10538
     * @param string $substring     <p>The substring to search for.</p>
10539
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10540
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10541
     *
10542
     * @return int
10543
     */
10544 15
    public static function substr_count_simple(
10545
        string $str,
10546
        string $substring,
10547
        bool $caseSensitive = true,
10548
        string $encoding = 'UTF-8'
10549
    ): int {
10550 15
        if ($str === '' || $substring === '') {
10551 2
            return 0;
10552
        }
10553
10554 13
        if ($encoding === 'UTF-8') {
10555 7
            if ($caseSensitive) {
10556
                return (int) \mb_substr_count($str, $substring);
10557
            }
10558
10559 7
            return (int) \mb_substr_count(
10560 7
                \mb_strtoupper($str),
10561 7
                \mb_strtoupper($substring)
10562
10563
            );
10564
        }
10565
10566 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10567
10568 6
        if ($caseSensitive) {
10569 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10570
        }
10571
10572 3
        return (int) \mb_substr_count(
10573 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10574 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10575 3
            $encoding
10576
        );
10577
    }
10578
10579
    /**
10580
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10581
     *
10582
     * @param string $haystack <p>The string to search in.</p>
10583
     * @param string $needle   <p>The substring to search for.</p>
10584
     *
10585
     * @return string return the sub-string
10586
     */
10587 2
    public static function substr_ileft(string $haystack, string $needle): string
10588
    {
10589 2
        if ($haystack === '') {
10590 2
            return '';
10591
        }
10592
10593 2
        if ($needle === '') {
10594 2
            return $haystack;
10595
        }
10596
10597 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10598 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10599
        }
10600
10601 2
        return $haystack;
10602
    }
10603
10604
    /**
10605
     * Get part of a string process in bytes.
10606
     *
10607
     * @param string $str    <p>The string being checked.</p>
10608
     * @param int    $offset <p>The first position used in str.</p>
10609
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10610
     *
10611
     * @return false|string
10612
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10613
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10614
     *                      characters long, <b>FALSE</b> will be returned.
10615
     */
10616
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10617
    {
10618
        // empty string
10619
        if ($str === '' || $length === 0) {
10620
            return '';
10621
        }
10622
10623
        // whole string
10624
        if (!$offset && $length === null) {
10625
            return $str;
10626
        }
10627
10628
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10629
            // "mb_" is available if overload is used, so use it ...
10630
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10631
        }
10632
10633
        return \substr($str, $offset, $length ?? 2147483647);
10634
    }
10635
10636
    /**
10637
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10638
     *
10639
     * @param string $haystack <p>The string to search in.</p>
10640
     * @param string $needle   <p>The substring to search for.</p>
10641
     *
10642
     * @return string return the sub-string
10643
     */
10644 2
    public static function substr_iright(string $haystack, string $needle): string
10645
    {
10646 2
        if ($haystack === '') {
10647 2
            return '';
10648
        }
10649
10650 2
        if ($needle === '') {
10651 2
            return $haystack;
10652
        }
10653
10654 2
        if (self::str_iends_with($haystack, $needle) === true) {
10655 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10656
        }
10657
10658 2
        return $haystack;
10659
    }
10660
10661
    /**
10662
     * Removes an prefix ($needle) from start of the string ($haystack).
10663
     *
10664
     * @param string $haystack <p>The string to search in.</p>
10665
     * @param string $needle   <p>The substring to search for.</p>
10666
     *
10667
     * @return string return the sub-string
10668
     */
10669 2
    public static function substr_left(string $haystack, string $needle): string
10670
    {
10671 2
        if ($haystack === '') {
10672 2
            return '';
10673
        }
10674
10675 2
        if ($needle === '') {
10676 2
            return $haystack;
10677
        }
10678
10679 2
        if (self::str_starts_with($haystack, $needle) === true) {
10680 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10681
        }
10682
10683 2
        return $haystack;
10684
    }
10685
10686
    /**
10687
     * Replace text within a portion of a string.
10688
     *
10689
     * source: https://gist.github.com/stemar/8287074
10690
     *
10691
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10692
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10693
     * @param int|int[]       $offset      <p>
10694
     *                                     If start is positive, the replacing will begin at the start'th offset
10695
     *                                     into string.
10696
     *                                     <br><br>
10697
     *                                     If start is negative, the replacing will begin at the start'th character
10698
     *                                     from the end of string.
10699
     *                                     </p>
10700
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10701
     *                                     portion of string which is to be replaced. If it is negative, it
10702
     *                                     represents the number of characters from the end of string at which to
10703
     *                                     stop replacing. If it is not given, then it will default to strlen(
10704
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10705
     *                                     length is zero then this function will have the effect of inserting
10706
     *                                     replacement into string at the given start offset.</p>
10707
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10708
     *
10709
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10710
     */
10711 10
    public static function substr_replace(
10712
        $str,
10713
        $replacement,
10714
        $offset,
10715
        $length = null,
10716
        string $encoding = 'UTF-8'
10717
    ) {
10718 10
        if (\is_array($str) === true) {
10719 1
            $num = \count($str);
10720
10721
            // the replacement
10722 1
            if (\is_array($replacement) === true) {
10723 1
                $replacement = \array_slice($replacement, 0, $num);
10724
            } else {
10725 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10726
            }
10727
10728
            // the offset
10729 1
            if (\is_array($offset) === true) {
10730 1
                $offset = \array_slice($offset, 0, $num);
10731 1
                foreach ($offset as &$valueTmp) {
10732 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10733
                }
10734 1
                unset($valueTmp);
10735
            } else {
10736 1
                $offset = \array_pad([$offset], $num, $offset);
10737
            }
10738
10739
            // the length
10740 1
            if ($length === null) {
10741 1
                $length = \array_fill(0, $num, 0);
10742 1
            } elseif (\is_array($length) === true) {
10743 1
                $length = \array_slice($length, 0, $num);
10744 1
                foreach ($length as &$valueTmpV2) {
10745 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10746
                }
10747 1
                unset($valueTmpV2);
10748
            } else {
10749 1
                $length = \array_pad([$length], $num, $length);
10750
            }
10751
10752
            // recursive call
10753 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10754
        }
10755
10756 10
        if (\is_array($replacement) === true) {
10757 1
            if (\count($replacement) > 0) {
10758 1
                $replacement = $replacement[0];
10759
            } else {
10760 1
                $replacement = '';
10761
            }
10762
        }
10763
10764
        // init
10765 10
        $str = (string) $str;
10766 10
        $replacement = (string) $replacement;
10767
10768 10
        if (\is_array($length) === true) {
10769
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10770
        }
10771
10772 10
        if (\is_array($offset) === true) {
10773
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10774
        }
10775
10776 10
        if ($str === '') {
10777 1
            return $replacement;
10778
        }
10779
10780 9
        if (self::$SUPPORT['mbstring'] === true) {
10781 9
            $string_length = (int) self::strlen($str, $encoding);
10782
10783 9
            if ($offset < 0) {
10784 1
                $offset = (int) \max(0, $string_length + $offset);
10785 9
            } elseif ($offset > $string_length) {
10786 1
                $offset = $string_length;
10787
            }
10788
10789 9
            if ($length !== null && $length < 0) {
10790 1
                $length = (int) \max(0, $string_length - $offset + $length);
10791 9
            } elseif ($length === null || $length > $string_length) {
10792 4
                $length = $string_length;
10793
            }
10794
10795
            /** @noinspection AdditionOperationOnArraysInspection */
10796 9
            if (($offset + $length) > $string_length) {
10797 4
                $length = $string_length - $offset;
10798
            }
10799
10800
            /** @noinspection AdditionOperationOnArraysInspection */
10801 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10802 9
                   $replacement .
10803 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10804
        }
10805
10806
        //
10807
        // fallback for ascii only
10808
        //
10809
10810
        if (self::is_ascii($str)) {
10811
            return ($length === null) ?
10812
                \substr_replace($str, $replacement, $offset) :
10813
                \substr_replace($str, $replacement, $offset, $length);
10814
        }
10815
10816
        //
10817
        // fallback via vanilla php
10818
        //
10819
10820
        \preg_match_all('/./us', $str, $smatches);
10821
        \preg_match_all('/./us', $replacement, $rmatches);
10822
10823
        if ($length === null) {
10824
            $lengthTmp = self::strlen($str, $encoding);
10825
            if ($lengthTmp === false) {
10826
                // e.g.: non mbstring support + invalid chars
10827
                return '';
10828
            }
10829
            $length = (int) $lengthTmp;
10830
        }
10831
10832
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10833
10834
        return \implode('', $smatches[0]);
10835
    }
10836
10837
    /**
10838
     * Removes an suffix ($needle) from end of the string ($haystack).
10839
     *
10840
     * @param string $haystack <p>The string to search in.</p>
10841
     * @param string $needle   <p>The substring to search for.</p>
10842
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10843
     *
10844
     * @return string return the sub-string
10845
     */
10846 2
    public static function substr_right(
10847
        string $haystack,
10848
        string $needle,
10849
        string $encoding = 'UTF-8'
10850
    ): string {
10851 2
        if ($haystack === '') {
10852 2
            return '';
10853
        }
10854
10855 2
        if ($needle === '') {
10856 2
            return $haystack;
10857
        }
10858
10859
        if (
10860 2
            $encoding === 'UTF-8'
10861
            &&
10862 2
            \substr($haystack, -\strlen($needle)) === $needle
10863
        ) {
10864 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10865
        }
10866
10867 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10868
            return (string) self::substr(
10869
                $haystack,
10870
                0,
10871
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10872
                $encoding
10873
            );
10874
        }
10875
10876 2
        return $haystack;
10877
    }
10878
10879
    /**
10880
     * Returns a case swapped version of the string.
10881
     *
10882
     * @param string $str       <p>The input string.</p>
10883
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10884
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10885
     *
10886
     * @return string each character's case swapped
10887
     */
10888 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10889
    {
10890 6
        if ($str === '') {
10891 1
            return '';
10892
        }
10893
10894 6
        if ($cleanUtf8 === true) {
10895
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10896
            // if invalid characters are found in $haystack before $needle
10897 2
            $str = self::clean($str);
10898
        }
10899
10900 6
        if ($encoding === 'UTF-8') {
10901 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10902
        }
10903
10904 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10905
    }
10906
10907
    /**
10908
     * Checks whether symfony-polyfills are used.
10909
     *
10910
     * @return bool
10911
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10912
     */
10913
    public static function symfony_polyfill_used(): bool
10914
    {
10915
        // init
10916
        $return = false;
10917
10918
        $returnTmp = \extension_loaded('mbstring');
10919
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10920
            $return = true;
10921
        }
10922
10923
        $returnTmp = \extension_loaded('iconv');
10924
        if ($returnTmp === false && \function_exists('iconv')) {
10925
            $return = true;
10926
        }
10927
10928
        return $return;
10929
    }
10930
10931
    /**
10932
     * @param string $str
10933
     * @param int    $tabLength
10934
     *
10935
     * @return string
10936
     */
10937 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10938
    {
10939 6
        if ($tabLength === 4) {
10940 3
            $spaces = '    ';
10941 3
        } elseif ($tabLength === 2) {
10942 1
            $spaces = '  ';
10943
        } else {
10944 2
            $spaces = \str_repeat(' ', $tabLength);
10945
        }
10946
10947 6
        return \str_replace("\t", $spaces, $str);
10948
    }
10949
10950
    /**
10951
     * Converts the first character of each word in the string to uppercase
10952
     * and all other chars to lowercase.
10953
     *
10954
     * @param string      $str                   <p>The input string.</p>
10955
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10956
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10957
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10958
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10959
     *
10960
     * @return string string with all characters of $str being title-cased
10961
     */
10962 5
    public static function titlecase(
10963
        string $str,
10964
        string $encoding = 'UTF-8',
10965
        bool $cleanUtf8 = false,
10966
        string $lang = null,
10967
        bool $tryToKeepStringLength = false
10968
    ): string {
10969 5
        if ($cleanUtf8 === true) {
10970
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10971
            // if invalid characters are found in $haystack before $needle
10972
            $str = self::clean($str);
10973
        }
10974
10975 5
        if ($lang === null && $tryToKeepStringLength === false) {
10976 5
            if ($encoding === 'UTF-8') {
10977 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10978
            }
10979
10980 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10981
10982 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10983
        }
10984
10985
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10986
    }
10987
10988
    /**
10989
     * alias for "UTF8::to_ascii()"
10990
     *
10991
     * @see        UTF8::to_ascii()
10992
     *
10993
     * @param string $str
10994
     * @param string $subst_chr
10995
     * @param bool   $strict
10996
     *
10997
     * @return string
10998
     *
10999
     * @deprecated <p>use "UTF8::to_ascii()"</p>
11000
     */
11001 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
11002
    {
11003 7
        return self::to_ascii($str, $subst_chr, $strict);
11004
    }
11005
11006
    /**
11007
     * alias for "UTF8::to_iso8859()"
11008
     *
11009
     * @see        UTF8::to_iso8859()
11010
     *
11011
     * @param string|string[] $str
11012
     *
11013
     * @return string|string[]
11014
     *
11015
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11016
     */
11017 2
    public static function toIso8859($str)
11018
    {
11019 2
        return self::to_iso8859($str);
11020
    }
11021
11022
    /**
11023
     * alias for "UTF8::to_latin1()"
11024
     *
11025
     * @see        UTF8::to_latin1()
11026
     *
11027
     * @param string|string[] $str
11028
     *
11029
     * @return string|string[]
11030
     *
11031
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11032
     */
11033 2
    public static function toLatin1($str)
11034
    {
11035 2
        return self::to_latin1($str);
11036
    }
11037
11038
    /**
11039
     * alias for "UTF8::to_utf8()"
11040
     *
11041
     * @see        UTF8::to_utf8()
11042
     *
11043
     * @param string|string[] $str
11044
     *
11045
     * @return string|string[]
11046
     *
11047
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11048
     */
11049 2
    public static function toUTF8($str)
11050
    {
11051 2
        return self::to_utf8($str);
11052
    }
11053
11054
    /**
11055
     * Convert a string into ASCII.
11056
     *
11057
     * @param string $str     <p>The input string.</p>
11058
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11059
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11060
     *                        performance</p>
11061
     *
11062
     * @return string
11063
     */
11064 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11065
    {
11066 38
        static $UTF8_TO_ASCII;
11067
11068 38
        if ($str === '') {
11069 3
            return '';
11070
        }
11071
11072
        // check if we only have ASCII, first (better performance)
11073 35
        if (self::is_ascii($str) === true) {
11074 9
            return $str;
11075
        }
11076
11077 28
        $str = self::clean(
11078 28
            $str,
11079 28
            true,
11080 28
            true,
11081 28
            true,
11082 28
            false,
11083 28
            true,
11084 28
            true
11085
        );
11086
11087
        // check again, if we only have ASCII, now ...
11088 28
        if (self::is_ascii($str) === true) {
11089 10
            return $str;
11090
        }
11091
11092
        if (
11093 19
            $strict === true
11094
            &&
11095 19
            self::$SUPPORT['intl'] === true
11096
        ) {
11097
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11098
            /** @noinspection PhpComposerExtensionStubsInspection */
11099
            /** @noinspection UnnecessaryCastingInspection */
11100 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11101
11102
            // check again, if we only have ASCII, now ...
11103 1
            if (self::is_ascii($str) === true) {
11104 1
                return $str;
11105
            }
11106
        }
11107
11108 19
        if (self::$ORD === null) {
11109
            self::$ORD = self::getData('ord');
11110
        }
11111
11112 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11113 19
        $chars = $ar[0];
11114 19
        $ord = null;
11115 19
        foreach ($chars as &$c) {
11116 19
            $ordC0 = self::$ORD[$c[0]];
11117
11118 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11119 15
                continue;
11120
            }
11121
11122 19
            $ordC1 = self::$ORD[$c[1]];
11123
11124
            // ASCII - next please
11125 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11126 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11127
            }
11128
11129 19
            if ($ordC0 >= 224) {
11130 8
                $ordC2 = self::$ORD[$c[2]];
11131
11132 8
                if ($ordC0 <= 239) {
11133 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11134
                }
11135
11136 8
                if ($ordC0 >= 240) {
11137 2
                    $ordC3 = self::$ORD[$c[3]];
11138
11139 2
                    if ($ordC0 <= 247) {
11140 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11141
                    }
11142
11143 2
                    if ($ordC0 >= 248) {
11144
                        $ordC4 = self::$ORD[$c[4]];
11145
11146
                        if ($ordC0 <= 251) {
11147
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11148
                        }
11149
11150
                        if ($ordC0 >= 252) {
11151
                            $ordC5 = self::$ORD[$c[5]];
11152
11153
                            if ($ordC0 <= 253) {
11154
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11155
                            }
11156
                        }
11157
                    }
11158
                }
11159
            }
11160
11161 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11162
                $c = $unknown;
11163
11164
                continue;
11165
            }
11166
11167 19
            if ($ord === null) {
11168
                $c = $unknown;
11169
11170
                continue;
11171
            }
11172
11173 19
            $bank = $ord >> 8;
11174 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11175 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11176 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11177 2
                    $UTF8_TO_ASCII[$bank] = [];
11178
                }
11179
            }
11180
11181 19
            $newchar = $ord & 255;
11182
11183
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11184 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11185
11186
                // keep for debugging
11187
                /*
11188
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11189
                echo "char: " . $c . "\n";
11190
                echo "ord: " . $ord . "\n";
11191
                echo "newchar: " . $newchar . "\n";
11192
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11193
                echo "bank:" . $bank . "\n\n";
11194
                 */
11195
11196 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11197
            } else {
11198
11199
                // keep for debugging missing chars
11200
                /*
11201
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11202
                echo "char: " . $c . "\n";
11203
                echo "ord: " . $ord . "\n";
11204
                echo "newchar: " . $newchar . "\n";
11205
                echo "bank:" . $bank . "\n\n";
11206
                 */
11207
11208 19
                $c = $unknown;
11209
            }
11210
        }
11211
11212 19
        return \implode('', $chars);
11213
    }
11214
11215
    /**
11216
     * @param mixed $str
11217
     *
11218
     * @return bool
11219
     */
11220 19
    public static function to_boolean($str): bool
11221
    {
11222
        // init
11223 19
        $str = (string) $str;
11224
11225 19
        if ($str === '') {
11226 2
            return false;
11227
        }
11228
11229
        // Info: http://php.net/manual/en/filter.filters.validate.php
11230
        $map = [
11231 17
            'true'  => true,
11232
            '1'     => true,
11233
            'on'    => true,
11234
            'yes'   => true,
11235
            'false' => false,
11236
            '0'     => false,
11237
            'off'   => false,
11238
            'no'    => false,
11239
        ];
11240
11241 17
        if (isset($map[$str])) {
11242 11
            return $map[$str];
11243
        }
11244
11245 6
        $key = \strtolower($str);
11246 6
        if (isset($map[$key])) {
11247 2
            return $map[$key];
11248
        }
11249
11250 4
        if (\is_numeric($str)) {
11251 2
            return ((float) $str + 0) > 0;
11252
        }
11253
11254 2
        return (bool) \trim($str);
11255
    }
11256
11257
    /**
11258
     * Convert given string to safe filename (and keep string case).
11259
     *
11260
     * @param string $string
11261
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11262
     *                                  simply replaced with hyphen.
11263
     * @param string $fallback_char
11264
     *
11265
     * @return string
11266
     */
11267 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11268
    {
11269 1
        if ($use_transliterate === true) {
11270 1
            $string = self::str_transliterate($string, $fallback_char);
11271
        }
11272
11273 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11274
11275 1
        $string = (string) \preg_replace(
11276
            [
11277 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11278 1
                '/[\s]+/u',                                           // 2) convert spaces to $fallback_char
11279 1
                '/[' . $fallback_char_escaped . ']+/u',               // 3) remove double $fallback_char's
11280
            ],
11281
            [
11282 1
                '',
11283 1
                $fallback_char,
11284 1
                $fallback_char,
11285
            ],
11286 1
            $string
11287
        );
11288
11289
        // trim "$fallback_char" from beginning and end of the string
11290 1
        return \trim($string, $fallback_char);
11291
    }
11292
11293
    /**
11294
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11295
     *
11296
     * @param string|string[] $str
11297
     *
11298
     * @return string|string[]
11299
     */
11300 8
    public static function to_iso8859($str)
11301
    {
11302 8
        if (\is_array($str) === true) {
11303 2
            foreach ($str as $k => &$v) {
11304 2
                $v = self::to_iso8859($v);
11305
            }
11306
11307 2
            return $str;
11308
        }
11309
11310 8
        $str = (string) $str;
11311 8
        if ($str === '') {
11312 2
            return '';
11313
        }
11314
11315 8
        return self::utf8_decode($str);
11316
    }
11317
11318
    /**
11319
     * alias for "UTF8::to_iso8859()"
11320
     *
11321
     * @see UTF8::to_iso8859()
11322
     *
11323
     * @param string|string[] $str
11324
     *
11325
     * @return string|string[]
11326
     */
11327 2
    public static function to_latin1($str)
11328
    {
11329 2
        return self::to_iso8859($str);
11330
    }
11331
11332
    /**
11333
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11334
     *
11335
     * <ul>
11336
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11337
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11338
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11339
     * case.</li>
11340
     * </ul>
11341
     *
11342
     * @param string|string[] $str                    <p>Any string or array.</p>
11343
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11344
     *
11345
     * @return string|string[] the UTF-8 encoded string
11346
     */
11347 38
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11348
    {
11349 38
        if (\is_array($str) === true) {
11350 4
            foreach ($str as $k => &$v) {
11351 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11352
            }
11353
11354 4
            return $str;
11355
        }
11356
11357 38
        $str = (string) $str;
11358 38
        if ($str === '') {
11359 6
            return $str;
11360
        }
11361
11362 38
        $max = \strlen($str);
11363 38
        $buf = '';
11364
11365 38
        for ($i = 0; $i < $max; ++$i) {
11366 38
            $c1 = $str[$i];
11367
11368 38
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11369
11370 34
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11371
11372 31
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11373
11374 31
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11375 17
                        $buf .= $c1 . $c2;
11376 17
                        ++$i;
11377
                    } else { // not valid UTF8 - convert it
11378 31
                        $buf .= self::to_utf8_convert_helper($c1);
11379
                    }
11380 33
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11381
11382 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11383 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11384
11385 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11386 14
                        $buf .= $c1 . $c2 . $c3;
11387 14
                        $i += 2;
11388
                    } else { // not valid UTF8 - convert it
11389 32
                        $buf .= self::to_utf8_convert_helper($c1);
11390
                    }
11391 25
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11392
11393 25
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11394 25
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11395 25
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11396
11397 25
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11398 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11399 8
                        $i += 3;
11400
                    } else { // not valid UTF8 - convert it
11401 25
                        $buf .= self::to_utf8_convert_helper($c1);
11402
                    }
11403
                } else { // doesn't look like UTF8, but should be converted
11404
11405 34
                    $buf .= self::to_utf8_convert_helper($c1);
11406
                }
11407 35
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11408
11409 4
                $buf .= self::to_utf8_convert_helper($c1);
11410
            } else { // it doesn't need conversion
11411
11412 35
                $buf .= $c1;
11413
            }
11414
        }
11415
11416
        // decode unicode escape sequences + unicode surrogate pairs
11417 38
        $buf = \preg_replace_callback(
11418 38
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11419
            /**
11420
             * @param array $matches
11421
             *
11422
             * @return string
11423
             */
11424
            static function (array $matches): string {
11425 9
                if (isset($matches[3])) {
11426 9
                    $cp = (int) \hexdec($matches[3]);
11427
                } else {
11428
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11429
                    $cp = ((int) \hexdec($matches[1]) << 10)
11430
                          + (int) \hexdec($matches[2])
11431
                          + 0x10000
11432
                          - (0xD800 << 10)
11433
                          - 0xDC00;
11434
                }
11435
11436
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11437
                //
11438
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11439
11440 9
                if ($cp < 0x80) {
11441 7
                    return (string) self::chr($cp);
11442
                }
11443
11444 6
                if ($cp < 0xA0) {
11445
                    /** @noinspection UnnecessaryCastingInspection */
11446
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11447
                }
11448
11449 6
                return self::decimal_to_chr($cp);
11450 38
            },
11451 38
            $buf
11452
        );
11453
11454 38
        if ($buf === null) {
11455
            return '';
11456
        }
11457
11458
        // decode UTF-8 codepoints
11459 38
        if ($decodeHtmlEntityToUtf8 === true) {
11460 2
            $buf = self::html_entity_decode($buf);
11461
        }
11462
11463 38
        return $buf;
11464
    }
11465
11466
    /**
11467
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11468
     *
11469
     * INFO: This is slower then "trim()"
11470
     *
11471
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11472
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11473
     *
11474
     * @param string      $str   <p>The string to be trimmed</p>
11475
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11476
     *
11477
     * @return string the trimmed string
11478
     */
11479 55
    public static function trim(string $str = '', string $chars = null): string
11480
    {
11481 55
        if ($str === '') {
11482 9
            return '';
11483
        }
11484
11485 48
        if ($chars) {
11486 27
            $chars = \preg_quote($chars, '/');
11487 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11488
        } else {
11489 21
            $pattern = "^[\s]+|[\s]+\$";
11490
        }
11491
11492 48
        if (self::$SUPPORT['mbstring'] === true) {
11493
            /** @noinspection PhpComposerExtensionStubsInspection */
11494 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11495
        }
11496
11497 8
        return self::regex_replace($str, $pattern, '', '', '/');
11498
    }
11499
11500
    /**
11501
     * Makes string's first char uppercase.
11502
     *
11503
     * @param string      $str                   <p>The input string.</p>
11504
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11505
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11506
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11507
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11508
     *
11509
     * @return string the resulting string
11510
     */
11511 69
    public static function ucfirst(
11512
        string $str,
11513
        string $encoding = 'UTF-8',
11514
        bool $cleanUtf8 = false,
11515
        string $lang = null,
11516
        bool $tryToKeepStringLength = false
11517
    ): string {
11518 69
        if ($str === '') {
11519 3
            return '';
11520
        }
11521
11522 68
        if ($cleanUtf8 === true) {
11523
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11524
            // if invalid characters are found in $haystack before $needle
11525 1
            $str = self::clean($str);
11526
        }
11527
11528 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11529
11530 68
        if ($encoding === 'UTF-8') {
11531 22
            $strPartTwo = (string) \mb_substr($str, 1);
11532
11533 22
            if ($useMbFunction === true) {
11534 22
                $strPartOne = \mb_strtoupper(
11535 22
                    (string) \mb_substr($str, 0, 1)
11536
                );
11537
            } else {
11538
                $strPartOne = self::strtoupper(
11539
                    (string) \mb_substr($str, 0, 1),
11540
                    $encoding,
11541
                    false,
11542
                    $lang,
11543 22
                    $tryToKeepStringLength
11544
                );
11545
            }
11546
        } else {
11547 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11548
11549 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11550
11551 47
            if ($useMbFunction === true) {
11552 47
                $strPartOne = \mb_strtoupper(
11553 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11554 47
                    $encoding
11555
                );
11556
            } else {
11557
                $strPartOne = self::strtoupper(
11558
                    (string) self::substr($str, 0, 1, $encoding),
11559
                    $encoding,
11560
                    false,
11561
                    $lang,
11562
                    $tryToKeepStringLength
11563
                );
11564
            }
11565
        }
11566
11567 68
        return $strPartOne . $strPartTwo;
11568
    }
11569
11570
    /**
11571
     * alias for "UTF8::ucfirst()"
11572
     *
11573
     * @see UTF8::ucfirst()
11574
     *
11575
     * @param string $str
11576
     * @param string $encoding
11577
     * @param bool   $cleanUtf8
11578
     *
11579
     * @return string
11580
     */
11581 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11582
    {
11583 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11584
    }
11585
11586
    /**
11587
     * Uppercase for all words in the string.
11588
     *
11589
     * @param string   $str        <p>The input string.</p>
11590
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11591
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11592
     *                             word.</p>
11593
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11594
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11595
     *
11596
     * @return string
11597
     */
11598 8
    public static function ucwords(
11599
        string $str,
11600
        array $exceptions = [],
11601
        string $charlist = '',
11602
        string $encoding = 'UTF-8',
11603
        bool $cleanUtf8 = false
11604
    ): string {
11605 8
        if (!$str) {
11606 2
            return '';
11607
        }
11608
11609
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11610
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11611
11612 7
        if ($cleanUtf8 === true) {
11613
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11614
            // if invalid characters are found in $haystack before $needle
11615 1
            $str = self::clean($str);
11616
        }
11617
11618 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11619
11620
        if (
11621 7
            $usePhpDefaultFunctions === true
11622
            &&
11623 7
            self::is_ascii($str) === true
11624
        ) {
11625
            return \ucwords($str);
11626
        }
11627
11628 7
        $words = self::str_to_words($str, $charlist);
11629 7
        $useExceptions = \count($exceptions) > 0;
11630
11631 7
        foreach ($words as &$word) {
11632 7
            if (!$word) {
11633 7
                continue;
11634
            }
11635
11636
            if (
11637 7
                $useExceptions === false
11638
                ||
11639 7
                !\in_array($word, $exceptions, true)
11640
            ) {
11641 7
                $word = self::ucfirst($word, $encoding);
11642
            }
11643
        }
11644
11645 7
        return \implode('', $words);
11646
    }
11647
11648
    /**
11649
     * Multi decode html entity & fix urlencoded-win1252-chars.
11650
     *
11651
     * e.g:
11652
     * 'test+test'                     => 'test test'
11653
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11654
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11655
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11656
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11657
     * 'Düsseldorf'                   => 'Düsseldorf'
11658
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11659
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11660
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11661
     *
11662
     * @param string $str          <p>The input string.</p>
11663
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11664
     *
11665
     * @return string
11666
     */
11667 3
    public static function urldecode(string $str, bool $multi_decode = true): string
11668
    {
11669 3
        if ($str === '') {
11670 2
            return '';
11671
        }
11672
11673
        if (
11674 3
            \strpos($str, '&') === false
11675
            &&
11676 3
            \strpos($str, '%') === false
11677
            &&
11678 3
            \strpos($str, '+') === false
11679
            &&
11680 3
            \strpos($str, '\u') === false
11681
        ) {
11682 2
            return self::fix_simple_utf8($str);
11683
        }
11684
11685 3
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
11686 3
        if (\preg_match($pattern, $str)) {
11687 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
11688
        }
11689
11690 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
11691
11692 3
        if ($multi_decode === true) {
11693
            do {
11694 3
                $str_compare = $str;
11695
11696
                /**
11697
                 * @psalm-suppress PossiblyInvalidArgument
11698
                 */
11699 3
                $str = self::fix_simple_utf8(
11700 3
                    \urldecode(
11701 3
                        self::html_entity_decode(
11702 3
                            self::to_utf8($str),
11703 3
                            $flags
11704
                        )
11705
                    )
11706
                );
11707 3
            } while ($str_compare !== $str);
11708
        }
11709
11710 3
        return $str;
11711
    }
11712
11713
    /**
11714
     * Return a array with "urlencoded"-win1252 -> UTF-8
11715
     *
11716
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11717
     *
11718
     * @return string[]
11719
     */
11720 2
    public static function urldecode_fix_win1252_chars(): array
11721
    {
11722
        return [
11723 2
            '%20' => ' ',
11724
            '%21' => '!',
11725
            '%22' => '"',
11726
            '%23' => '#',
11727
            '%24' => '$',
11728
            '%25' => '%',
11729
            '%26' => '&',
11730
            '%27' => "'",
11731
            '%28' => '(',
11732
            '%29' => ')',
11733
            '%2A' => '*',
11734
            '%2B' => '+',
11735
            '%2C' => ',',
11736
            '%2D' => '-',
11737
            '%2E' => '.',
11738
            '%2F' => '/',
11739
            '%30' => '0',
11740
            '%31' => '1',
11741
            '%32' => '2',
11742
            '%33' => '3',
11743
            '%34' => '4',
11744
            '%35' => '5',
11745
            '%36' => '6',
11746
            '%37' => '7',
11747
            '%38' => '8',
11748
            '%39' => '9',
11749
            '%3A' => ':',
11750
            '%3B' => ';',
11751
            '%3C' => '<',
11752
            '%3D' => '=',
11753
            '%3E' => '>',
11754
            '%3F' => '?',
11755
            '%40' => '@',
11756
            '%41' => 'A',
11757
            '%42' => 'B',
11758
            '%43' => 'C',
11759
            '%44' => 'D',
11760
            '%45' => 'E',
11761
            '%46' => 'F',
11762
            '%47' => 'G',
11763
            '%48' => 'H',
11764
            '%49' => 'I',
11765
            '%4A' => 'J',
11766
            '%4B' => 'K',
11767
            '%4C' => 'L',
11768
            '%4D' => 'M',
11769
            '%4E' => 'N',
11770
            '%4F' => 'O',
11771
            '%50' => 'P',
11772
            '%51' => 'Q',
11773
            '%52' => 'R',
11774
            '%53' => 'S',
11775
            '%54' => 'T',
11776
            '%55' => 'U',
11777
            '%56' => 'V',
11778
            '%57' => 'W',
11779
            '%58' => 'X',
11780
            '%59' => 'Y',
11781
            '%5A' => 'Z',
11782
            '%5B' => '[',
11783
            '%5C' => '\\',
11784
            '%5D' => ']',
11785
            '%5E' => '^',
11786
            '%5F' => '_',
11787
            '%60' => '`',
11788
            '%61' => 'a',
11789
            '%62' => 'b',
11790
            '%63' => 'c',
11791
            '%64' => 'd',
11792
            '%65' => 'e',
11793
            '%66' => 'f',
11794
            '%67' => 'g',
11795
            '%68' => 'h',
11796
            '%69' => 'i',
11797
            '%6A' => 'j',
11798
            '%6B' => 'k',
11799
            '%6C' => 'l',
11800
            '%6D' => 'm',
11801
            '%6E' => 'n',
11802
            '%6F' => 'o',
11803
            '%70' => 'p',
11804
            '%71' => 'q',
11805
            '%72' => 'r',
11806
            '%73' => 's',
11807
            '%74' => 't',
11808
            '%75' => 'u',
11809
            '%76' => 'v',
11810
            '%77' => 'w',
11811
            '%78' => 'x',
11812
            '%79' => 'y',
11813
            '%7A' => 'z',
11814
            '%7B' => '{',
11815
            '%7C' => '|',
11816
            '%7D' => '}',
11817
            '%7E' => '~',
11818
            '%7F' => '',
11819
            '%80' => '`',
11820
            '%81' => '',
11821
            '%82' => '‚',
11822
            '%83' => 'ƒ',
11823
            '%84' => '„',
11824
            '%85' => '…',
11825
            '%86' => '†',
11826
            '%87' => '‡',
11827
            '%88' => 'ˆ',
11828
            '%89' => '‰',
11829
            '%8A' => 'Š',
11830
            '%8B' => '‹',
11831
            '%8C' => 'Œ',
11832
            '%8D' => '',
11833
            '%8E' => 'Ž',
11834
            '%8F' => '',
11835
            '%90' => '',
11836
            '%91' => '‘',
11837
            '%92' => '’',
11838
            '%93' => '“',
11839
            '%94' => '”',
11840
            '%95' => '•',
11841
            '%96' => '–',
11842
            '%97' => '—',
11843
            '%98' => '˜',
11844
            '%99' => '™',
11845
            '%9A' => 'š',
11846
            '%9B' => '›',
11847
            '%9C' => 'œ',
11848
            '%9D' => '',
11849
            '%9E' => 'ž',
11850
            '%9F' => 'Ÿ',
11851
            '%A0' => '',
11852
            '%A1' => '¡',
11853
            '%A2' => '¢',
11854
            '%A3' => '£',
11855
            '%A4' => '¤',
11856
            '%A5' => '¥',
11857
            '%A6' => '¦',
11858
            '%A7' => '§',
11859
            '%A8' => '¨',
11860
            '%A9' => '©',
11861
            '%AA' => 'ª',
11862
            '%AB' => '«',
11863
            '%AC' => '¬',
11864
            '%AD' => '',
11865
            '%AE' => '®',
11866
            '%AF' => '¯',
11867
            '%B0' => '°',
11868
            '%B1' => '±',
11869
            '%B2' => '²',
11870
            '%B3' => '³',
11871
            '%B4' => '´',
11872
            '%B5' => 'µ',
11873
            '%B6' => '¶',
11874
            '%B7' => '·',
11875
            '%B8' => '¸',
11876
            '%B9' => '¹',
11877
            '%BA' => 'º',
11878
            '%BB' => '»',
11879
            '%BC' => '¼',
11880
            '%BD' => '½',
11881
            '%BE' => '¾',
11882
            '%BF' => '¿',
11883
            '%C0' => 'À',
11884
            '%C1' => 'Á',
11885
            '%C2' => 'Â',
11886
            '%C3' => 'Ã',
11887
            '%C4' => 'Ä',
11888
            '%C5' => 'Å',
11889
            '%C6' => 'Æ',
11890
            '%C7' => 'Ç',
11891
            '%C8' => 'È',
11892
            '%C9' => 'É',
11893
            '%CA' => 'Ê',
11894
            '%CB' => 'Ë',
11895
            '%CC' => 'Ì',
11896
            '%CD' => 'Í',
11897
            '%CE' => 'Î',
11898
            '%CF' => 'Ï',
11899
            '%D0' => 'Ð',
11900
            '%D1' => 'Ñ',
11901
            '%D2' => 'Ò',
11902
            '%D3' => 'Ó',
11903
            '%D4' => 'Ô',
11904
            '%D5' => 'Õ',
11905
            '%D6' => 'Ö',
11906
            '%D7' => '×',
11907
            '%D8' => 'Ø',
11908
            '%D9' => 'Ù',
11909
            '%DA' => 'Ú',
11910
            '%DB' => 'Û',
11911
            '%DC' => 'Ü',
11912
            '%DD' => 'Ý',
11913
            '%DE' => 'Þ',
11914
            '%DF' => 'ß',
11915
            '%E0' => 'à',
11916
            '%E1' => 'á',
11917
            '%E2' => 'â',
11918
            '%E3' => 'ã',
11919
            '%E4' => 'ä',
11920
            '%E5' => 'å',
11921
            '%E6' => 'æ',
11922
            '%E7' => 'ç',
11923
            '%E8' => 'è',
11924
            '%E9' => 'é',
11925
            '%EA' => 'ê',
11926
            '%EB' => 'ë',
11927
            '%EC' => 'ì',
11928
            '%ED' => 'í',
11929
            '%EE' => 'î',
11930
            '%EF' => 'ï',
11931
            '%F0' => 'ð',
11932
            '%F1' => 'ñ',
11933
            '%F2' => 'ò',
11934
            '%F3' => 'ó',
11935
            '%F4' => 'ô',
11936
            '%F5' => 'õ',
11937
            '%F6' => 'ö',
11938
            '%F7' => '÷',
11939
            '%F8' => 'ø',
11940
            '%F9' => 'ù',
11941
            '%FA' => 'ú',
11942
            '%FB' => 'û',
11943
            '%FC' => 'ü',
11944
            '%FD' => 'ý',
11945
            '%FE' => 'þ',
11946
            '%FF' => 'ÿ',
11947
        ];
11948
    }
11949
11950
    /**
11951
     * Decodes an UTF-8 string to ISO-8859-1.
11952
     *
11953
     * @param string $str           <p>The input string.</p>
11954
     * @param bool   $keepUtf8Chars
11955
     *
11956
     * @return string
11957
     */
11958 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11959
    {
11960 14
        if ($str === '') {
11961 6
            return '';
11962
        }
11963
11964
        // save for later comparision
11965 14
        $str_backup = $str;
11966 14
        $len = \strlen($str);
11967
11968 14
        if (self::$ORD === null) {
11969
            self::$ORD = self::getData('ord');
11970
        }
11971
11972 14
        if (self::$CHR === null) {
11973
            self::$CHR = self::getData('chr');
11974
        }
11975
11976 14
        $noCharFound = '?';
11977
        /** @noinspection ForeachInvariantsInspection */
11978 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11979 14
            switch ($str[$i] & "\xF0") {
11980 14
                case "\xC0":
11981 13
                case "\xD0":
11982 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11983 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11984
11985 13
                    break;
11986
11987
                /** @noinspection PhpMissingBreakStatementInspection */
11988 13
                case "\xF0":
11989
                    ++$i;
11990
11991
                // no break
11992
11993 13
                case "\xE0":
11994 11
                    $str[$j] = $noCharFound;
11995 11
                    $i += 2;
11996
11997 11
                    break;
11998
11999
                default:
12000 12
                    $str[$j] = $str[$i];
12001
            }
12002
        }
12003
12004 14
        $return = \substr($str, 0, $j);
12005 14
        if ($return === false) {
12006
            $return = '';
12007
        }
12008
12009
        if (
12010 14
            $keepUtf8Chars === true
12011
            &&
12012 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12013
        ) {
12014 2
            return $str_backup;
12015
        }
12016
12017 14
        return $return;
12018
    }
12019
12020
    /**
12021
     * Encodes an ISO-8859-1 string to UTF-8.
12022
     *
12023
     * @param string $str <p>The input string.</p>
12024
     *
12025
     * @return string
12026
     */
12027 14
    public static function utf8_encode(string $str): string
12028
    {
12029 14
        if ($str === '') {
12030 14
            return '';
12031
        }
12032
12033 14
        $str = \utf8_encode($str);
12034
12035
        // the polyfill maybe return false
12036
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12037
        /** @psalm-suppress TypeDoesNotContainType */
12038 14
        if ($str === false) {
12039
            return '';
12040
        }
12041
12042 14
        return $str;
12043
    }
12044
12045
    /**
12046
     * fix -> utf8-win1252 chars
12047
     *
12048
     * @param string $str <p>The input string.</p>
12049
     *
12050
     * @return string
12051
     *
12052
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12053
     */
12054 2
    public static function utf8_fix_win1252_chars(string $str): string
12055
    {
12056 2
        return self::fix_simple_utf8($str);
12057
    }
12058
12059
    /**
12060
     * Returns an array with all utf8 whitespace characters.
12061
     *
12062
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12063
     *
12064
     * @author: Derek E. [email protected]
12065
     *
12066
     * @return string[]
12067
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12068
     *                  as defined in above URL
12069
     */
12070 2
    public static function whitespace_table(): array
12071
    {
12072 2
        return self::$WHITESPACE_TABLE;
12073
    }
12074
12075
    /**
12076
     * Limit the number of words in a string.
12077
     *
12078
     * @param string $str      <p>The input string.</p>
12079
     * @param int    $limit    <p>The limit of words as integer.</p>
12080
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12081
     *
12082
     * @return string
12083
     */
12084 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12085
    {
12086 2
        if ($str === '' || $limit < 1) {
12087 2
            return '';
12088
        }
12089
12090 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12091
12092
        if (
12093 2
            !isset($matches[0])
12094
            ||
12095 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12096
        ) {
12097 2
            return $str;
12098
        }
12099
12100 2
        return \rtrim($matches[0]) . $strAddOn;
12101
    }
12102
12103
    /**
12104
     * Wraps a string to a given number of characters
12105
     *
12106
     * @see  http://php.net/manual/en/function.wordwrap.php
12107
     *
12108
     * @param string $str   <p>The input string.</p>
12109
     * @param int    $width [optional] <p>The column width.</p>
12110
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12111
     * @param bool   $cut   [optional] <p>
12112
     *                      If the cut is set to true, the string is
12113
     *                      always wrapped at or before the specified width. So if you have
12114
     *                      a word that is larger than the given width, it is broken apart.
12115
     *                      </p>
12116
     *
12117
     * @return string
12118
     *                <p>The given string wrapped at the specified column.</p>
12119
     */
12120 10
    public static function wordwrap(
12121
        string $str,
12122
        int $width = 75,
12123
        string $break = "\n",
12124
        bool $cut = false
12125
    ): string {
12126 10
        if ($str === '' || $break === '') {
12127 3
            return '';
12128
        }
12129
12130 8
        $strSplit = \explode($break, $str);
12131 8
        if ($strSplit === false) {
12132
            return '';
12133
        }
12134
12135 8
        $chars = [];
12136 8
        $wordSplit = '';
12137 8
        foreach ($strSplit as $i => $iValue) {
12138 8
            if ($i) {
12139 1
                $chars[] = $break;
12140 1
                $wordSplit .= '#';
12141
            }
12142
12143 8
            foreach (self::str_split($iValue) as $c) {
12144 8
                $chars[] = $c;
12145 8
                $wordSplit .= $c === ' ' ? ' ' : '?';
12146
            }
12147
        }
12148
12149 8
        $strReturn = '';
12150 8
        $j = 0;
12151 8
        $b = $i = -1;
12152 8
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12153
12154 8
        while (false !== $b = \mb_strpos($wordSplit, '#', $b + 1)) {
12155 6
            for (++$i; $i < $b; ++$i) {
12156 6
                $strReturn .= $chars[$j];
12157 6
                unset($chars[$j++]);
12158
            }
12159
12160
            if (
12161 6
                $break === $chars[$j]
12162
                ||
12163 6
                $chars[$j] === ' '
12164
            ) {
12165 3
                unset($chars[$j++]);
12166
            }
12167
12168 6
            $strReturn .= $break;
12169
        }
12170
12171 8
        return $strReturn . \implode('', $chars);
12172
    }
12173
12174
    /**
12175
     * Line-Wrap the string after $limit, but also after the next word.
12176
     *
12177
     * @param string $str
12178
     * @param int    $limit
12179
     *
12180
     * @return string
12181
     */
12182 1
    public static function wordwrap_per_line(string $str, int $limit): string
12183
    {
12184 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12185
12186 1
        $string = '';
12187 1
        foreach ($strings as &$value) {
12188 1
            if ($value === false) {
12189
                continue;
12190
            }
12191
12192 1
            $string .= \wordwrap($value, $limit);
12193 1
            $string .= "\n";
12194
        }
12195
12196 1
        return $string;
12197
    }
12198
12199
    /**
12200
     * Returns an array of Unicode White Space characters.
12201
     *
12202
     * @return string[] an array with numeric code point as key and White Space Character as value
12203
     */
12204 2
    public static function ws(): array
12205
    {
12206 2
        return self::$WHITESPACE;
12207
    }
12208
12209
    /**
12210
     * @return true|null
12211
     */
12212 9
    private static function initEmojiData()
12213
    {
12214 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12215 1
            if (self::$EMOJI === null) {
12216 1
                self::$EMOJI = self::getData('emoji');
12217
            }
12218
12219 1
            \uksort(
12220 1
                self::$EMOJI,
12221
                static function (string $a, string $b): int {
12222 1
                    return \strlen($b) <=> \strlen($a);
12223 1
                }
12224
            );
12225
12226 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12227 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12228
12229 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12230 1
                $tmpKey = \crc32($key);
12231 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12232
            }
12233
12234 1
            return true;
12235
        }
12236
12237 9
        return null;
12238
    }
12239
12240
    /**
12241
     * @param string $str
12242
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12243
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12244
     *
12245
     * @return string
12246
     */
12247 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12248
    {
12249 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12250 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12251
12252 33
        if ($useLower === true) {
12253 2
            $str = \str_replace(
12254 2
                $upper,
12255 2
                $lower,
12256 2
                $str
12257
            );
12258
        } else {
12259 31
            $str = \str_replace(
12260 31
                $lower,
12261 31
                $upper,
12262 31
                $str
12263
            );
12264
        }
12265
12266 33
        if ($fullCaseFold) {
12267 31
            static $FULL_CASE_FOLD = null;
12268 31
            if ($FULL_CASE_FOLD === null) {
12269 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12270
            }
12271
12272 31
            if ($useLower === true) {
12273 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12274
            } else {
12275 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12276
            }
12277
        }
12278
12279 33
        return $str;
12280
    }
12281
12282
    /**
12283
     * get data from "/data/*.php"
12284
     *
12285
     * @param string $file
12286
     *
12287
     * @return array
12288
     */
12289 6
    private static function getData(string $file): array
12290
    {
12291
        /** @noinspection PhpIncludeInspection */
12292
        /** @noinspection UsingInclusionReturnValueInspection */
12293
        /** @psalm-suppress UnresolvableInclude */
12294 6
        return include __DIR__ . '/data/' . $file . '.php';
12295
    }
12296
12297
    /**
12298
     * get data from "/data/*.php"
12299
     *
12300
     * @param string $file
12301
     *
12302
     * @return false|mixed will return false on error
12303
     */
12304 9
    private static function getDataIfExists(string $file)
12305
    {
12306 9
        $file = __DIR__ . '/data/' . $file . '.php';
12307 9
        if (\file_exists($file)) {
12308
            /** @noinspection PhpIncludeInspection */
12309
            /** @noinspection UsingInclusionReturnValueInspection */
12310 8
            return include $file;
12311
        }
12312
12313 2
        return false;
12314
    }
12315
12316
    /**
12317
     * Checks whether mbstring "overloaded" is active on the server.
12318
     *
12319
     * @return bool
12320
     */
12321
    private static function mbstring_overloaded(): bool
12322
    {
12323
        /**
12324
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12325
         */
12326
12327
        /** @noinspection PhpComposerExtensionStubsInspection */
12328
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12329
        return \defined('MB_OVERLOAD_STRING')
12330
               &&
12331
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12332
    }
12333
12334
    /**
12335
     * @param array $strings
12336
     * @param bool  $removeEmptyValues
12337
     * @param int   $removeShortValues
12338
     *
12339
     * @return array
12340
     */
12341 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12342
    {
12343
        // init
12344 2
        $return = [];
12345
12346 2
        foreach ($strings as &$str) {
12347
            if (
12348 2
                $removeShortValues !== null
12349
                &&
12350 2
                \mb_strlen($str) <= $removeShortValues
12351
            ) {
12352 2
                continue;
12353
            }
12354
12355
            if (
12356 2
                $removeEmptyValues === true
12357
                &&
12358 2
                \trim($str) === ''
12359
            ) {
12360 2
                continue;
12361
            }
12362
12363 2
            $return[] = $str;
12364
        }
12365
12366 2
        return $return;
12367
    }
12368
12369
    /**
12370
     * rxClass
12371
     *
12372
     * @param string $s
12373
     * @param string $class
12374
     *
12375
     * @return string
12376
     */
12377 33
    private static function rxClass(string $s, string $class = ''): string
12378
    {
12379 33
        static $RX_CLASS_CACHE = [];
12380
12381 33
        $cacheKey = $s . $class;
12382
12383 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12384 21
            return $RX_CLASS_CACHE[$cacheKey];
12385
        }
12386
12387 16
        $classArray = [$class];
12388
12389
        /** @noinspection SuspiciousLoopInspection */
12390
        /** @noinspection AlterInForeachInspection */
12391 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12392 15
            if ($s === '-') {
12393
                $classArray[0] = '-' . $classArray[0];
12394 15
            } elseif (!isset($s[2])) {
12395 15
                $classArray[0] .= \preg_quote($s, '/');
12396 1
            } elseif (self::strlen($s) === 1) {
12397 1
                $classArray[0] .= $s;
12398
            } else {
12399 15
                $classArray[] = $s;
12400
            }
12401
        }
12402
12403 16
        if ($classArray[0]) {
12404 16
            $classArray[0] = '[' . $classArray[0] . ']';
12405
        }
12406
12407 16
        if (\count($classArray) === 1) {
12408 16
            $return = $classArray[0];
12409
        } else {
12410
            $return = '(?:' . \implode('|', $classArray) . ')';
12411
        }
12412
12413 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12414
12415 16
        return $return;
12416
    }
12417
12418
    /**
12419
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12420
     *
12421
     * @param string $names
12422
     * @param string $delimiter
12423
     * @param string $encoding
12424
     *
12425
     * @return string
12426
     */
12427 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12428
    {
12429
        // init
12430 1
        $namesArray = \explode($delimiter, $names);
12431
12432 1
        if ($namesArray === false) {
12433
            return '';
12434
        }
12435
12436
        $specialCases = [
12437 1
            'names' => [
12438
                'ab',
12439
                'af',
12440
                'al',
12441
                'and',
12442
                'ap',
12443
                'bint',
12444
                'binte',
12445
                'da',
12446
                'de',
12447
                'del',
12448
                'den',
12449
                'der',
12450
                'di',
12451
                'dit',
12452
                'ibn',
12453
                'la',
12454
                'mac',
12455
                'nic',
12456
                'of',
12457
                'ter',
12458
                'the',
12459
                'und',
12460
                'van',
12461
                'von',
12462
                'y',
12463
                'zu',
12464
            ],
12465
            'prefixes' => [
12466
                'al-',
12467
                "d'",
12468
                'ff',
12469
                "l'",
12470
                'mac',
12471
                'mc',
12472
                'nic',
12473
            ],
12474
        ];
12475
12476 1
        foreach ($namesArray as &$name) {
12477 1
            if (\in_array($name, $specialCases['names'], true)) {
12478 1
                continue;
12479
            }
12480
12481 1
            $continue = false;
12482
12483 1
            if ($delimiter === '-') {
12484 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12485 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12486 1
                        $continue = true;
12487
                    }
12488
                }
12489 1
                unset($beginning);
12490
            }
12491
12492 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12493 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12494 1
                    $continue = true;
12495
                }
12496
            }
12497 1
            unset($beginning);
12498
12499 1
            if ($continue === true) {
12500 1
                continue;
12501
            }
12502
12503 1
            $name = self::ucfirst($name);
12504
        }
12505
12506 1
        return \implode($delimiter, $namesArray);
12507
    }
12508
12509
    /**
12510
     * Generic case sensitive transformation for collation matching.
12511
     *
12512
     * @param string $str <p>The input string</p>
12513
     *
12514
     * @return string|null
12515
     */
12516 6
    private static function strtonatfold(string $str)
12517
    {
12518 6
        return \preg_replace(
12519 6
            '/\p{Mn}+/u',
12520 6
            '',
12521 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12522
        );
12523
    }
12524
12525
    /**
12526
     * @param int|string $input
12527
     *
12528
     * @return string
12529
     */
12530 30
    private static function to_utf8_convert_helper($input): string
12531
    {
12532
        // init
12533 30
        $buf = '';
12534
12535 30
        if (self::$ORD === null) {
12536 1
            self::$ORD = self::getData('ord');
12537
        }
12538
12539 30
        if (self::$CHR === null) {
12540 1
            self::$CHR = self::getData('chr');
12541
        }
12542
12543 30
        if (self::$WIN1252_TO_UTF8 === null) {
12544 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12545
        }
12546
12547 30
        $ordC1 = self::$ORD[$input];
12548 30
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12549 30
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12550
        } else {
12551 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12552 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12553 1
            $buf .= $cc1 . $cc2;
12554
        }
12555
12556 30
        return $buf;
12557
    }
12558
}
12559