Passed
Push — master ( 4644d4...047df5 )
by Lars
03:34
created

UTF8   F

Complexity

Total Complexity 1695

Size/Duplication

Total Lines 12551
Duplicated Lines 0 %

Test Coverage

Coverage 79.83%

Importance

Changes 0
Metric Value
eloc 4354
dl 0
loc 12551
ccs 3039
cts 3807
cp 0.7983
rs 0.8
c 0
b 0
f 0
wmc 1695

294 Methods

Rating   Name   Duplication   Size   Complexity  
A chr_to_decimal() 0 30 6
A file_has_bom() 0 8 2
A max() 0 14 3
A add_bom_to_string() 0 7 2
A parse_str() 0 16 4
A filter_input() 0 13 2
A array_change_key_case() 0 20 5
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A max_chr_width() 0 8 2
A isBinary() 0 3 1
A ltrim() 0 19 4
A emoji_decode() 0 16 2
D is_utf8() 0 144 31
A lcword() 0 8 1
A mbstring_loaded() 0 3 1
D chr() 0 101 18
A html_escape() 0 6 1
C normalize_encoding() 0 134 14
C get_file_type() 0 89 14
A chr_to_int() 0 3 1
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 54 13
A normalize_whitespace() 0 30 6
A isBase64() 0 3 1
A is_html() 0 12 2
A decode_mimeheader() 0 15 5
A html_decode() 0 3 1
A isUtf32() 0 3 1
A chunk_split() 0 3 1
A emoji_encode() 0 16 2
A is_alpha() 0 8 2
B get_random_string() 0 53 10
A fix_utf8() 0 30 4
A first_char() 0 11 4
A isUtf8() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 46 6
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 7 2
A normalize_line_ending() 0 3 1
B range() 0 41 10
A normalize_msword() 0 43 2
A is_blank() 0 8 2
D getCharDirection() 0 105 118
A htmlspecialchars() 0 11 3
A filter_var_array() 0 9 2
A __construct() 0 2 1
A decimal_to_chr() 0 3 1
A pcre_utf8_support() 0 4 1
B between() 0 48 8
A codepoints() 0 29 4
A lowerCaseFirst() 0 8 1
A chr_map() 0 5 1
A cleanup() 0 25 2
A char_at() 0 7 2
A chars() 0 3 1
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A hasBom() 0 3 1
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A filter_var() 0 9 2
A is_empty() 0 3 1
B html_encode() 0 42 7
A isUtf16() 0 3 1
F encode() 0 139 37
C is_utf32() 0 65 16
C ord() 0 65 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
A checkForSupport() 0 47 4
B is_json() 0 27 8
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A is_base64() 0 16 5
A hex_to_int() 0 14 3
A htmlentities() 0 23 3
A hex_to_chr() 0 3 1
A isJson() 0 3 1
A filter_input_array() 0 9 2
A getSupportInfo() 0 13 3
A chr_to_hex() 0 11 3
A min() 0 14 3
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 86 17
A access() 0 11 4
B file_get_contents() 0 58 11
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A str_substr_after_first_separator() 0 28 6
B str_camelize() 0 70 10
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 96 19
A str_isubstr_last() 0 20 4
A str_replace_beginning() 0 21 6
A remove_left() 0 21 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 24 2
A str_iends_with() 0 7 3
C utf8_decode() 0 60 13
A remove_html() 0 3 1
B str_longest_common_suffix() 0 51 10
C wordwrap() 0 52 12
B ucfirst() 0 57 7
A str_pad_both() 0 7 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A str_limit() 0 26 6
A toUTF8() 0 3 1
A string() 0 10 1
B rxClass() 0 39 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 152 5
A str_starts_with() 0 3 1
A str_humanize() 0 15 1
C substr_count_in_byte() 0 54 15
A strchr() 0 8 1
A strichr() 0 8 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 73 16
A regex_replace() 0 20 3
A titlecase() 0 24 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 16 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 12 4
A str_matches_pattern() 0 3 1
B str_titleize() 0 55 10
A ws() 0 3 1
A str_replace_first() 0 17 2
A toLatin1() 0 3 1
A str_pad_right() 0 7 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 40 8
A trim() 0 19 4
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 3 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 10 3
B strtr() 0 33 8
A str_contains_all() 0 23 6
A str_isubstr_after_last_separator() 0 23 5
B strspn() 0 30 10
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
B rawurldecode() 0 37 8
B str_capitalize_name_helper() 0 80 10
A utf8_encode() 0 16 3
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 7 3
A str_replace() 0 14 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 17 5
A strip_tags() 0 15 4
A str_isubstr_before_last_separator() 0 21 6
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 118 25
A remove_right() 0 22 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 15 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 3 1
F to_ascii() 0 149 27
A reduce_string_array() 0 26 6
B str_longest_common_prefix() 0 48 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 7 1
A str_substr_first() 0 33 6
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 32 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 23 5
B str_snakeize() 0 55 6
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 3 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A getDataIfExists() 0 10 2
A toAscii() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 8 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
A strtonatfold() 0 6 1
C strcspn() 0 51 12
A fixStrCaseHelper() 0 33 5
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 13 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 15 3
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 35 5
A to_utf8_convert_helper() 0 27 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
A str_contains_any() 0 23 6
A initEmojiData() 0 26 4
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 5 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 5 1
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @see   UTF8::chr_map()
420
     *
421
     * @param array|string $callback
422
     * @param string       $str
423
     *
424
     * @return string[]
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
465
     *
466
     * @return true|null
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 12
            if (self::$CHR === null) {
556
                self::$CHR = (array) self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 12
            $chr = self::$CHR[$code_point];
563
564 12
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 12
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = (array) self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @see UTF8::chr_to_decimal()
741
     *
742
     * @param string $chr
743
     *
744
     * @return int
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regx = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808 114
        $str = (string) \preg_replace($regx, '$1', $str);
809
810 114
        if ($replace_diamond_question_mark === true) {
811 60
            $str = self::replace_diamond_question_mark($str, '');
812
        }
813
814 114
        if ($remove_invisible_characters === true) {
815 114
            $str = self::remove_invisible_characters($str);
816
        }
817
818 114
        if ($normalize_whitespace === true) {
819 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
820
        }
821
822 114
        if ($normalize_msword === true) {
823 32
            $str = self::normalize_msword($str);
824
        }
825
826 114
        if ($remove_bom === true) {
827 64
            $str = self::remove_bom($str);
828
        }
829
830 114
        return $str;
831
    }
832
833
    /**
834
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
835
     *
836
     * @param string $str <p>The input string.</p>
837
     *
838
     * @return string
839
     */
840 33
    public static function cleanup($str): string
841
    {
842
        // init
843 33
        $str = (string) $str;
844
845 33
        if ($str === '') {
846 5
            return '';
847
        }
848
849
        // fixed ISO <-> UTF-8 Errors
850 33
        $str = self::fix_simple_utf8($str);
851
852
        // remove all none UTF-8 symbols
853
        // && remove diamond question mark (�)
854
        // && remove remove invisible characters (e.g. "\0")
855
        // && remove BOM
856
        // && normalize whitespace chars (but keep non-breaking-spaces)
857 33
        return self::clean(
858 33
            $str,
859 33
            true,
860 33
            true,
861 33
            false,
862 33
            true,
863 33
            true,
864 33
            true
865
        );
866
    }
867
868
    /**
869
     * Accepts a string or a array of strings and returns an array of Unicode code points.
870
     *
871
     * INFO: opposite to UTF8::string()
872
     *
873
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
874
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
875
     *                                 default, code points will be returned as integers.</p>
876
     *
877
     * @return array<int|string>
878
     *                           The array of code points:<br>
879
     *                           array<int> for $u_style === false<br>
880
     *                           array<string> for $u_style === true<br>
881
     */
882 12
    public static function codepoints($arg, bool $u_style = false): array
883
    {
884 12
        if (\is_string($arg) === true) {
885 12
            $arg = self::str_split($arg);
886
        }
887
888 12
        $arg = \array_map(
889
            [
890 12
                self::class,
891
                'ord',
892
            ],
893 12
            $arg
894
        );
895
896 12
        if (\count($arg) === 0) {
897 7
            return [];
898
        }
899
900 11
        if ($u_style === true) {
901 2
            $arg = \array_map(
902
                [
903 2
                    self::class,
904
                    'int_to_hex',
905
                ],
906 2
                $arg
907
            );
908
        }
909
910 11
        return $arg;
911
    }
912
913
    /**
914
     * Trims the string and replaces consecutive whitespace characters with a
915
     * single space. This includes tabs and newline characters, as well as
916
     * multibyte whitespace such as the thin space and ideographic space.
917
     *
918
     * @param string $str <p>The input string.</p>
919
     *
920
     * @return string string with a trimmed $str and condensed whitespace
921
     */
922 13
    public static function collapse_whitespace(string $str): string
923
    {
924 13
        if (self::$SUPPORT['mbstring'] === true) {
925
            /** @noinspection PhpComposerExtensionStubsInspection */
926 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
927
        }
928
929
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
930
    }
931
932
    /**
933
     * Returns count of characters used in a string.
934
     *
935
     * @param string $str                <p>The input string.</p>
936
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
937
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
938
     *
939
     * @return int[] an associative array of Character as keys and
940
     *               their count as values
941
     */
942 19
    public static function count_chars(
943
        string $str,
944
        bool $cleanUtf8 = false,
945
        bool $tryToUseMbFunction = true
946
    ): array {
947 19
        return \array_count_values(
948 19
            self::str_split(
949 19
                $str,
950 19
                1,
951 19
                $cleanUtf8,
952 19
                $tryToUseMbFunction
953
            )
954
        );
955
    }
956
957
    /**
958
     * Remove css media-queries.
959
     *
960
     * @param string $str
961
     *
962
     * @return string
963
     */
964 1
    public static function css_stripe_media_queries(string $str): string
965
    {
966 1
        return (string) \preg_replace(
967 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
968 1
            '',
969 1
            $str
970
        );
971
    }
972
973
    /**
974
     * Checks whether ctype is available on the server.
975
     *
976
     * @return bool
977
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
978
     */
979
    public static function ctype_loaded(): bool
980
    {
981
        return \extension_loaded('ctype');
982
    }
983
984
    /**
985
     * Converts a int-value into an UTF-8 character.
986
     *
987
     * @param mixed $int
988
     *
989
     * @return string
990
     */
991 19
    public static function decimal_to_chr($int): string
992
    {
993 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
994
    }
995
996
    /**
997
     * Decodes a MIME header field
998
     *
999
     * @param string $str
1000
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1001
     *
1002
     * @return false|string
1003
     *                      A decoded MIME field on success,
1004
     *                      or false if an error occurs during the decoding
1005
     */
1006
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1007
    {
1008
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1009
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1010
        }
1011
1012
        if (self::$SUPPORT['iconv'] === true) {
1013
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1014
        }
1015
1016
        if ($encoding !== 'UTF-8') {
1017
            $str = self::encode($encoding, $str);
1018
        }
1019
1020
        return \mb_decode_mimeheader($str);
1021
    }
1022
1023
    /**
1024
     * Encode a string with a new charset-encoding.
1025
     *
1026
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1027
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1028
     *
1029
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1030
     * @param string $str                    <p>The input string</p>
1031
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1032
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1033
     *                                       string-encoding</p>
1034
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1035
     *                                       A empty string will trigger the autodetect anyway.</p>
1036
     *
1037
     * @return string
1038
     *
1039
     * @psalm-suppress InvalidReturnStatement
1040
     */
1041 28
    public static function encode(
1042
        string $toEncoding,
1043
        string $str,
1044
        bool $autodetectFromEncoding = true,
1045
        string $fromEncoding = ''
1046
    ): string {
1047 28
        if ($str === '' || $toEncoding === '') {
1048 13
            return $str;
1049
        }
1050
1051 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1052 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1053
        }
1054
1055 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1056 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1057
        }
1058
1059
        if (
1060 28
            $toEncoding
1061
            &&
1062 28
            $fromEncoding
1063
            &&
1064 28
            $fromEncoding === $toEncoding
1065
        ) {
1066
            return $str;
1067
        }
1068
1069 28
        if ($toEncoding === 'JSON') {
1070 1
            $return = self::json_encode($str);
1071 1
            if ($return === false) {
1072
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1073
            }
1074
1075 1
            return $return;
1076
        }
1077 28
        if ($fromEncoding === 'JSON') {
1078 1
            $str = self::json_decode($str);
1079 1
            $fromEncoding = '';
1080
        }
1081
1082 28
        if ($toEncoding === 'BASE64') {
1083 2
            return \base64_encode($str);
1084
        }
1085 28
        if ($fromEncoding === 'BASE64') {
1086 2
            $str = \base64_decode($str, true);
1087 2
            $fromEncoding = '';
1088
        }
1089
1090 28
        if ($toEncoding === 'HTML-ENTITIES') {
1091 2
            return self::html_encode($str, true, 'UTF-8');
1092
        }
1093 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1094 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1095 2
            $fromEncoding = '';
1096
        }
1097
1098 28
        $fromEncodingDetected = false;
1099
        if (
1100 28
            $autodetectFromEncoding === true
1101
            ||
1102 28
            !$fromEncoding
1103
        ) {
1104 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1105
        }
1106
1107
        // DEBUG
1108
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1109
1110 28
        if ($fromEncodingDetected !== false) {
1111 24
            $fromEncoding = $fromEncodingDetected;
1112 7
        } elseif ($autodetectFromEncoding === true) {
1113
            // fallback for the "autodetect"-mode
1114 7
            return self::to_utf8($str);
1115
        }
1116
1117
        if (
1118 24
            !$fromEncoding
1119
            ||
1120 24
            $fromEncoding === $toEncoding
1121
        ) {
1122 15
            return $str;
1123
        }
1124
1125
        if (
1126 19
            $toEncoding === 'UTF-8'
1127
            &&
1128
            (
1129 17
                $fromEncoding === 'WINDOWS-1252'
1130
                ||
1131 19
                $fromEncoding === 'ISO-8859-1'
1132
            )
1133
        ) {
1134 13
            return self::to_utf8($str);
1135
        }
1136
1137
        if (
1138 12
            $toEncoding === 'ISO-8859-1'
1139
            &&
1140
            (
1141 6
                $fromEncoding === 'WINDOWS-1252'
1142
                ||
1143 12
                $fromEncoding === 'UTF-8'
1144
            )
1145
        ) {
1146 6
            return self::to_iso8859($str);
1147
        }
1148
1149
        if (
1150 10
            $toEncoding !== 'UTF-8'
1151
            &&
1152 10
            $toEncoding !== 'ISO-8859-1'
1153
            &&
1154 10
            $toEncoding !== 'WINDOWS-1252'
1155
            &&
1156 10
            self::$SUPPORT['mbstring'] === false
1157
        ) {
1158
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1159
        }
1160
1161 10
        if (self::$SUPPORT['mbstring'] === true) {
1162
            // warning: do not use the symfony polyfill here
1163 10
            $strEncoded = \mb_convert_encoding(
1164 10
                $str,
1165 10
                $toEncoding,
1166 10
                $fromEncoding
1167
            );
1168
1169 10
            if ($strEncoded) {
1170 10
                return $strEncoded;
1171
            }
1172
        }
1173
1174
        $return = \iconv($fromEncoding, $toEncoding, $str);
1175
        if ($return !== false) {
1176
            return $return;
1177
        }
1178
1179
        return $str;
1180
    }
1181
1182
    /**
1183
     * @param string $str
1184
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1185
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1186
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1187
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1188
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1189
     *
1190
     * @return false|string
1191
     *                      An encoded MIME field on success,
1192
     *                      or false if an error occurs during the encoding
1193
     */
1194
    public static function encode_mimeheader(
1195
        $str,
1196
        $fromCharset = 'UTF-8',
1197
        $toCharset = 'UTF-8',
1198
        $transferEncoding = 'Q',
1199
        $linefeed = "\r\n",
1200
        $indent = 76
1201
    ) {
1202
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1203
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1204
        }
1205
1206
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1207
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1208
        }
1209
1210
        return \iconv_mime_encode(
1211
            '',
1212
            $str,
1213
            [
1214
                'scheme'           => $transferEncoding,
1215
                'line-length'      => $indent,
1216
                'input-charset'    => $fromCharset,
1217
                'output-charset'   => $toCharset,
1218
                'line-break-chars' => $linefeed,
1219
            ]
1220
        );
1221
    }
1222
1223
    /**
1224
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1225
     *
1226
     * @param string   $str                    <p>The input string.</p>
1227
     * @param string   $search                 <p>The searched string.</p>
1228
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1229
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1230
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1231
     *
1232
     * @return string
1233
     */
1234 1
    public static function extract_text(
1235
        string $str,
1236
        string $search = '',
1237
        int $length = null,
1238
        string $replacerForSkippedText = '…',
1239
        string $encoding = 'UTF-8'
1240
    ): string {
1241 1
        if ($str === '') {
1242 1
            return '';
1243
        }
1244
1245 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1246
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1247
        }
1248
1249 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1250
1251 1
        if ($length === null) {
1252 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1253
        }
1254
1255 1
        if ($search === '') {
1256 1
            if ($encoding === 'UTF-8') {
1257 1
                if ($length > 0) {
1258 1
                    $stringLength = (int) \mb_strlen($str);
1259 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1260
                } else {
1261 1
                    $end = 0;
1262
                }
1263
1264 1
                $pos = (int) \min(
1265 1
                    \mb_strpos($str, ' ', $end),
1266 1
                    \mb_strpos($str, '.', $end)
1267
                );
1268
            } else {
1269
                if ($length > 0) {
1270
                    $stringLength = (int) self::strlen($str, $encoding);
1271
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1272
                } else {
1273
                    $end = 0;
1274
                }
1275
1276
                $pos = (int) \min(
1277
                    self::strpos($str, ' ', $end, $encoding),
1278
                    self::strpos($str, '.', $end, $encoding)
1279
                );
1280
            }
1281
1282 1
            if ($pos) {
1283 1
                if ($encoding === 'UTF-8') {
1284 1
                    $strSub = \mb_substr($str, 0, $pos);
1285
                } else {
1286
                    $strSub = self::substr($str, 0, $pos, $encoding);
1287
                }
1288
1289 1
                if ($strSub === false) {
1290
                    return '';
1291
                }
1292
1293 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1294
            }
1295
1296
            return $str;
1297
        }
1298
1299 1
        if ($encoding === 'UTF-8') {
1300 1
            $wordPos = (int) \mb_stripos($str, $search);
1301 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1302
        } else {
1303
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1304
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1305
        }
1306
1307 1
        $pos_start = 0;
1308 1
        if ($halfSide > 0) {
1309 1
            if ($encoding === 'UTF-8') {
1310 1
                $halfText = \mb_substr($str, 0, $halfSide);
1311
            } else {
1312
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1313
            }
1314 1
            if ($halfText !== false) {
1315 1
                if ($encoding === 'UTF-8') {
1316 1
                    $pos_start = (int) \max(
1317 1
                        \mb_strrpos($halfText, ' '),
1318 1
                        \mb_strrpos($halfText, '.')
1319
                    );
1320
                } else {
1321
                    $pos_start = (int) \max(
1322
                        self::strrpos($halfText, ' ', 0, $encoding),
1323
                        self::strrpos($halfText, '.', 0, $encoding)
1324
                    );
1325
                }
1326
            }
1327
        }
1328
1329 1
        if ($wordPos && $halfSide > 0) {
1330 1
            $offset = $pos_start + $length - 1;
1331 1
            $realLength = (int) self::strlen($str, $encoding);
1332
1333 1
            if ($offset > $realLength) {
1334
                $offset = $realLength;
1335
            }
1336
1337 1
            if ($encoding === 'UTF-8') {
1338 1
                $pos_end = (int) \min(
1339 1
                    \mb_strpos($str, ' ', $offset),
1340 1
                    \mb_strpos($str, '.', $offset)
1341 1
                ) - $pos_start;
1342
            } else {
1343
                $pos_end = (int) \min(
1344
                    self::strpos($str, ' ', $offset, $encoding),
1345
                    self::strpos($str, '.', $offset, $encoding)
1346
                ) - $pos_start;
1347
            }
1348
1349 1
            if (!$pos_end || $pos_end <= 0) {
1350 1
                if ($encoding === 'UTF-8') {
1351 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1352
                } else {
1353
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1354
                }
1355 1
                if ($strSub !== false) {
1356 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1357
                } else {
1358 1
                    $extract = '';
1359
                }
1360
            } else {
1361 1
                if ($encoding === 'UTF-8') {
1362 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1363
                } else {
1364
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1365
                }
1366 1
                if ($strSub !== false) {
1367 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1368
                } else {
1369 1
                    $extract = '';
1370
                }
1371
            }
1372
        } else {
1373 1
            $offset = $length - 1;
1374 1
            $trueLength = (int) self::strlen($str, $encoding);
1375
1376 1
            if ($offset > $trueLength) {
1377
                $offset = $trueLength;
1378
            }
1379
1380 1
            if ($encoding === 'UTF-8') {
1381 1
                $pos_end = (int) \min(
1382 1
                    \mb_strpos($str, ' ', $offset),
1383 1
                    \mb_strpos($str, '.', $offset)
1384
                );
1385
            } else {
1386
                $pos_end = (int) \min(
1387
                    self::strpos($str, ' ', $offset, $encoding),
1388
                    self::strpos($str, '.', $offset, $encoding)
1389
                );
1390
            }
1391
1392 1
            if ($pos_end) {
1393 1
                if ($encoding === 'UTF-8') {
1394 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1395
                } else {
1396
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1397
                }
1398 1
                if ($strSub !== false) {
1399 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1400
                } else {
1401 1
                    $extract = '';
1402
                }
1403
            } else {
1404 1
                $extract = $str;
1405
            }
1406
        }
1407
1408 1
        return $extract;
1409
    }
1410
1411
    /**
1412
     * Reads entire file into a string.
1413
     *
1414
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1415
     *
1416
     * @see http://php.net/manual/en/function.file-get-contents.php
1417
     *
1418
     * @param string        $filename         <p>
1419
     *                                        Name of the file to read.
1420
     *                                        </p>
1421
     * @param bool          $use_include_path [optional] <p>
1422
     *                                        Prior to PHP 5, this parameter is called
1423
     *                                        use_include_path and is a bool.
1424
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1425
     *                                        to trigger include path
1426
     *                                        search.
1427
     *                                        </p>
1428
     * @param resource|null $context          [optional] <p>
1429
     *                                        A valid context resource created with
1430
     *                                        stream_context_create. If you don't need to use a
1431
     *                                        custom context, you can skip this parameter by &null;.
1432
     *                                        </p>
1433
     * @param int|null      $offset           [optional] <p>
1434
     *                                        The offset where the reading starts.
1435
     *                                        </p>
1436
     * @param int|null      $maxLength        [optional] <p>
1437
     *                                        Maximum length of data read. The default is to read until end
1438
     *                                        of file is reached.
1439
     *                                        </p>
1440
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1441
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1442
     *                                        some files, because they used non default utf-8 chars. Binary files
1443
     *                                        like images or pdf will not be converted.</p>
1444
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1445
     *                                        A empty string will trigger the autodetect anyway.</p>
1446
     *
1447
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1448
     */
1449 12
    public static function file_get_contents(
1450
        string $filename,
1451
        bool $use_include_path = false,
1452
        $context = null,
1453
        int $offset = null,
1454
        int $maxLength = null,
1455
        int $timeout = 10,
1456
        bool $convertToUtf8 = true,
1457
        string $fromEncoding = ''
1458
    ) {
1459
        // init
1460 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1461
1462 12
        if ($filename === false) {
1463
            return false;
1464
        }
1465
1466 12
        if ($timeout && $context === null) {
1467 9
            $context = \stream_context_create(
1468
                [
1469
                    'http' => [
1470 9
                        'timeout' => $timeout,
1471
                    ],
1472
                ]
1473
            );
1474
        }
1475
1476 12
        if ($offset === null) {
1477 12
            $offset = 0;
1478
        }
1479
1480 12
        if (\is_int($maxLength) === true) {
1481 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1482
        } else {
1483 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1484
        }
1485
1486
        // return false on error
1487 12
        if ($data === false) {
1488
            return false;
1489
        }
1490
1491 12
        if ($convertToUtf8 === true) {
1492
            if (
1493 12
                self::is_binary($data, true) === true
1494
                &&
1495 12
                self::is_utf16($data, false) === false
1496
                &&
1497 12
                self::is_utf32($data, false) === false
1498 7
            ) {
1499
                // do nothing, it's binary and not UTF16 or UTF32
1500
            } else {
1501 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1502 9
                $data = self::cleanup($data);
1503
            }
1504
        }
1505
1506 12
        return $data;
1507
    }
1508
1509
    /**
1510
     * Checks if a file starts with BOM (Byte Order Mark) character.
1511
     *
1512
     * @param string $file_path <p>Path to a valid file.</p>
1513
     *
1514
     * @throws \RuntimeException if file_get_contents() returned false
1515
     *
1516
     * @return bool
1517
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1518
     */
1519 2
    public static function file_has_bom(string $file_path): bool
1520
    {
1521 2
        $file_content = \file_get_contents($file_path);
1522 2
        if ($file_content === false) {
1523
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1524
        }
1525
1526 2
        return self::string_has_bom($file_content);
1527
    }
1528
1529
    /**
1530
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1531
     *
1532
     * @param mixed  $var
1533
     * @param int    $normalization_form
1534
     * @param string $leading_combining
1535
     *
1536
     * @return mixed
1537
     */
1538 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1539
    {
1540 62
        switch (\gettype($var)) {
1541 62
            case 'array':
1542 6
                foreach ($var as $k => &$v) {
1543 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1544
                }
1545 6
                unset($v);
1546
1547 6
                break;
1548 62
            case 'object':
1549 4
                foreach ($var as $k => &$v) {
1550 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1551
                }
1552 4
                unset($v);
1553
1554 4
                break;
1555 62
            case 'string':
1556
1557 62
                if (\strpos($var, "\r") !== false) {
1558
                    // Workaround https://bugs.php.net/65732
1559 3
                    $var = self::normalize_line_ending($var);
1560
                }
1561
1562 62
                if (self::is_ascii($var) === false) {
1563 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1564 27
                        $n = '-';
1565
                    } else {
1566 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1567
1568 12
                        if (isset($n[0])) {
1569 7
                            $var = $n;
1570
                        } else {
1571 8
                            $var = self::encode('UTF-8', $var, true);
1572
                        }
1573
                    }
1574
1575
                    if (
1576 32
                        $var[0] >= "\x80"
1577
                        &&
1578 32
                        isset($n[0], $leading_combining[0])
1579
                        &&
1580 32
                        \preg_match('/^\p{Mn}/u', $var)
1581
                    ) {
1582
                        // Prevent leading combining chars
1583
                        // for NFC-safe concatenations.
1584 3
                        $var = $leading_combining . $var;
1585
                    }
1586
                }
1587
1588 62
                break;
1589
        }
1590
1591 62
        return $var;
1592
    }
1593
1594
    /**
1595
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1596
     *
1597
     * Gets a specific external variable by name and optionally filters it
1598
     *
1599
     * @see  http://php.net/manual/en/function.filter-input.php
1600
     *
1601
     * @param int    $type          <p>
1602
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1603
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1604
     *                              <b>INPUT_ENV</b>.
1605
     *                              </p>
1606
     * @param string $variable_name <p>
1607
     *                              Name of a variable to get.
1608
     *                              </p>
1609
     * @param int    $filter        [optional] <p>
1610
     *                              The ID of the filter to apply. The
1611
     *                              manual page lists the available filters.
1612
     *                              </p>
1613
     * @param mixed  $options       [optional] <p>
1614
     *                              Associative array of options or bitwise disjunction of flags. If filter
1615
     *                              accepts options, flags can be provided in "flags" field of array.
1616
     *                              </p>
1617
     *
1618
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1619
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1620
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1621
     */
1622
    public static function filter_input(
1623
        int $type,
1624
        string $variable_name,
1625
        int $filter = \FILTER_DEFAULT,
1626
        $options = null
1627
    ) {
1628
        if (\func_num_args() < 4) {
1629
            $var = \filter_input($type, $variable_name, $filter);
1630
        } else {
1631
            $var = \filter_input($type, $variable_name, $filter, $options);
1632
        }
1633
1634
        return self::filter($var);
1635
    }
1636
1637
    /**
1638
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1639
     *
1640
     * Gets external variables and optionally filters them
1641
     *
1642
     * @see  http://php.net/manual/en/function.filter-input-array.php
1643
     *
1644
     * @param int   $type       <p>
1645
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1646
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1647
     *                          <b>INPUT_ENV</b>.
1648
     *                          </p>
1649
     * @param mixed $definition [optional] <p>
1650
     *                          An array defining the arguments. A valid key is a string
1651
     *                          containing a variable name and a valid value is either a filter type, or an array
1652
     *                          optionally specifying the filter, flags and options. If the value is an
1653
     *                          array, valid keys are filter which specifies the
1654
     *                          filter type,
1655
     *                          flags which specifies any flags that apply to the
1656
     *                          filter, and options which specifies any options that
1657
     *                          apply to the filter. See the example below for a better understanding.
1658
     *                          </p>
1659
     *                          <p>
1660
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1661
     *                          input array are filtered by this filter.
1662
     *                          </p>
1663
     * @param bool  $add_empty  [optional] <p>
1664
     *                          Add missing keys as <b>NULL</b> to the return value.
1665
     *                          </p>
1666
     *
1667
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1668
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1669
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1670
     *               is not set and <b>NULL</b> if the filter fails.
1671
     */
1672
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1673
    {
1674
        if (\func_num_args() < 2) {
1675
            $a = \filter_input_array($type);
1676
        } else {
1677
            $a = \filter_input_array($type, $definition, $add_empty);
1678
        }
1679
1680
        return self::filter($a);
1681
    }
1682
1683
    /**
1684
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1685
     *
1686
     * Filters a variable with a specified filter
1687
     *
1688
     * @see  http://php.net/manual/en/function.filter-var.php
1689
     *
1690
     * @param mixed $variable <p>
1691
     *                        Value to filter.
1692
     *                        </p>
1693
     * @param int   $filter   [optional] <p>
1694
     *                        The ID of the filter to apply. The
1695
     *                        manual page lists the available filters.
1696
     *                        </p>
1697
     * @param mixed $options  [optional] <p>
1698
     *                        Associative array of options or bitwise disjunction of flags. If filter
1699
     *                        accepts options, flags can be provided in "flags" field of array. For
1700
     *                        the "callback" filter, callable type should be passed. The
1701
     *                        callback must accept one argument, the value to be filtered, and return
1702
     *                        the value after filtering/sanitizing it.
1703
     *                        </p>
1704
     *                        <p>
1705
     *                        <code>
1706
     *                        // for filters that accept options, use this format
1707
     *                        $options = array(
1708
     *                        'options' => array(
1709
     *                        'default' => 3, // value to return if the filter fails
1710
     *                        // other options here
1711
     *                        'min_range' => 0
1712
     *                        ),
1713
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1714
     *                        );
1715
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1716
     *                        // for filter that only accept flags, you can pass them directly
1717
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1718
     *                        // for filter that only accept flags, you can also pass as an array
1719
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1720
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1721
     *                        // callback validate filter
1722
     *                        function foo($value)
1723
     *                        {
1724
     *                        // Expected format: Surname, GivenNames
1725
     *                        if (strpos($value, ", ") === false) return false;
1726
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1727
     *                        $empty = (empty($surname) || empty($givennames));
1728
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1729
     *                        if ($empty || $notstrings) {
1730
     *                        return false;
1731
     *                        } else {
1732
     *                        return $value;
1733
     *                        }
1734
     *                        }
1735
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1736
     *                        </code>
1737
     *                        </p>
1738
     *
1739
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1740
     */
1741 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1742
    {
1743 2
        if (\func_num_args() < 3) {
1744 2
            $variable = \filter_var($variable, $filter);
1745
        } else {
1746 2
            $variable = \filter_var($variable, $filter, $options);
1747
        }
1748
1749 2
        return self::filter($variable);
1750
    }
1751
1752
    /**
1753
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1754
     *
1755
     * Gets multiple variables and optionally filters them
1756
     *
1757
     * @see  http://php.net/manual/en/function.filter-var-array.php
1758
     *
1759
     * @param array $data       <p>
1760
     *                          An array with string keys containing the data to filter.
1761
     *                          </p>
1762
     * @param mixed $definition [optional] <p>
1763
     *                          An array defining the arguments. A valid key is a string
1764
     *                          containing a variable name and a valid value is either a
1765
     *                          filter type, or an
1766
     *                          array optionally specifying the filter, flags and options.
1767
     *                          If the value is an array, valid keys are filter
1768
     *                          which specifies the filter type,
1769
     *                          flags which specifies any flags that apply to the
1770
     *                          filter, and options which specifies any options that
1771
     *                          apply to the filter. See the example below for a better understanding.
1772
     *                          </p>
1773
     *                          <p>
1774
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1775
     *                          input array are filtered by this filter.
1776
     *                          </p>
1777
     * @param bool  $add_empty  [optional] <p>
1778
     *                          Add missing keys as <b>NULL</b> to the return value.
1779
     *                          </p>
1780
     *
1781
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1782
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1783
     *               set
1784
     */
1785 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1786
    {
1787 2
        if (\func_num_args() < 2) {
1788 2
            $a = \filter_var_array($data);
1789
        } else {
1790 2
            $a = \filter_var_array($data, $definition, $add_empty);
1791
        }
1792
1793 2
        return self::filter($a);
1794
    }
1795
1796
    /**
1797
     * Checks whether finfo is available on the server.
1798
     *
1799
     * @return bool
1800
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1801
     */
1802
    public static function finfo_loaded(): bool
1803
    {
1804
        return \class_exists('finfo');
1805
    }
1806
1807
    /**
1808
     * Returns the first $n characters of the string.
1809
     *
1810
     * @param string $str      <p>The input string.</p>
1811
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1812
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1813
     *
1814
     * @return string
1815
     */
1816 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1817
    {
1818 13
        if ($str === '' || $n <= 0) {
1819 5
            return '';
1820
        }
1821
1822 8
        if ($encoding === 'UTF-8') {
1823 4
            return (string) \mb_substr($str, 0, $n);
1824
        }
1825
1826 4
        return (string) self::substr($str, 0, $n, $encoding);
1827
    }
1828
1829
    /**
1830
     * Check if the number of unicode characters are not more than the specified integer.
1831
     *
1832
     * @param string $str      the original string to be checked
1833
     * @param int    $box_size the size in number of chars to be checked against string
1834
     *
1835
     * @return bool true if string is less than or equal to $box_size, false otherwise
1836
     */
1837 2
    public static function fits_inside(string $str, int $box_size): bool
1838
    {
1839 2
        return self::strlen($str) <= $box_size;
1840
    }
1841
1842
    /**
1843
     * Try to fix simple broken UTF-8 strings.
1844
     *
1845
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1846
     *
1847
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1848
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1849
     * See: http://en.wikipedia.org/wiki/Windows-1252
1850
     *
1851
     * @param string $str <p>The input string</p>
1852
     *
1853
     * @return string
1854
     */
1855 46
    public static function fix_simple_utf8(string $str): string
1856
    {
1857 46
        if ($str === '') {
1858 4
            return '';
1859
        }
1860
1861 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1862 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1863
1864 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1865 1
            if (self::$BROKEN_UTF8_FIX === null) {
1866 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1867
            }
1868
1869 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1870 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1871
        }
1872
1873 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1874
    }
1875
1876
    /**
1877
     * Fix a double (or multiple) encoded UTF8 string.
1878
     *
1879
     * @param string|string[] $str you can use a string or an array of strings
1880
     *
1881
     * @return string|string[]
1882
     *                         Will return the fixed input-"array" or
1883
     *                         the fixed input-"string"
1884
     *
1885
     * @psalm-suppress InvalidReturnType
1886
     */
1887 2
    public static function fix_utf8($str)
1888
    {
1889 2
        if (\is_array($str) === true) {
1890 2
            foreach ($str as $k => &$v) {
1891 2
                $v = self::fix_utf8($v);
1892
            }
1893 2
            unset($v);
1894
1895
            /**
1896
             * @psalm-suppress InvalidReturnStatement
1897
             */
1898 2
            return $str;
1899
        }
1900
1901 2
        $str = (string) $str;
1902 2
        $last = '';
1903 2
        while ($last !== $str) {
1904 2
            $last = $str;
1905
            /**
1906
             * @psalm-suppress PossiblyInvalidArgument
1907
             */
1908 2
            $str = self::to_utf8(
1909 2
                self::utf8_decode($str, true)
1910
            );
1911
        }
1912
1913
        /**
1914
         * @psalm-suppress InvalidReturnStatement
1915
         */
1916 2
        return $str;
1917
    }
1918
1919
    /**
1920
     * Get character of a specific character.
1921
     *
1922
     * @param string $char
1923
     *
1924
     * @return string 'RTL' or 'LTR'
1925
     */
1926 2
    public static function getCharDirection(string $char): string
1927
    {
1928 2
        if (self::$SUPPORT['intlChar'] === true) {
1929
            /** @noinspection PhpComposerExtensionStubsInspection */
1930 2
            $tmpReturn = \IntlChar::charDirection($char);
1931
1932
            // from "IntlChar"-Class
1933
            $charDirection = [
1934 2
                'RTL' => [1, 13, 14, 15, 21],
1935
                'LTR' => [0, 11, 12, 20],
1936
            ];
1937
1938 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1939
                return 'LTR';
1940
            }
1941
1942 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1943 2
                return 'RTL';
1944
            }
1945
        }
1946
1947 2
        $c = static::chr_to_decimal($char);
1948
1949 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1950 2
            return 'LTR';
1951
        }
1952
1953 2
        if ($c <= 0x85e) {
1954 2
            if ($c === 0x5be ||
1955 2
                $c === 0x5c0 ||
1956 2
                $c === 0x5c3 ||
1957 2
                $c === 0x5c6 ||
1958 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1959 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1960 2
                $c === 0x608 ||
1961 2
                $c === 0x60b ||
1962 2
                $c === 0x60d ||
1963 2
                $c === 0x61b ||
1964 2
                ($c >= 0x61e && $c <= 0x64a) ||
1965
                ($c >= 0x66d && $c <= 0x66f) ||
1966
                ($c >= 0x671 && $c <= 0x6d5) ||
1967
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1968
                ($c >= 0x6ee && $c <= 0x6ef) ||
1969
                ($c >= 0x6fa && $c <= 0x70d) ||
1970
                $c === 0x710 ||
1971
                ($c >= 0x712 && $c <= 0x72f) ||
1972
                ($c >= 0x74d && $c <= 0x7a5) ||
1973
                $c === 0x7b1 ||
1974
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1975
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1976
                $c === 0x7fa ||
1977
                ($c >= 0x800 && $c <= 0x815) ||
1978
                $c === 0x81a ||
1979
                $c === 0x824 ||
1980
                $c === 0x828 ||
1981
                ($c >= 0x830 && $c <= 0x83e) ||
1982
                ($c >= 0x840 && $c <= 0x858) ||
1983 2
                $c === 0x85e
1984
            ) {
1985 2
                return 'RTL';
1986
            }
1987 2
        } elseif ($c === 0x200f) {
1988
            return 'RTL';
1989 2
        } elseif ($c >= 0xfb1d) {
1990 2
            if ($c === 0xfb1d ||
1991 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1992 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1993 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1994 2
                $c === 0xfb3e ||
1995 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1996 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1997 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1998 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1999 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2000 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2001 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2002 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2003 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2004 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2005 2
                $c === 0x10808 ||
2006 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2007 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2008 2
                $c === 0x1083c ||
2009 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2010 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2011 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2012 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2013 2
                $c === 0x1093f ||
2014 2
                $c === 0x10a00 ||
2015 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2016 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2017 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2018 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2019 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2020 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2021 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2022 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2023 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2024 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2025
            ) {
2026 2
                return 'RTL';
2027
            }
2028
        }
2029
2030 2
        return 'LTR';
2031
    }
2032
2033
    /**
2034
     * Check for php-support.
2035
     *
2036
     * @param string|null $key
2037
     *
2038
     * @return mixed
2039
     *               Return the full support-"array", if $key === null<br>
2040
     *               return bool-value, if $key is used and available<br>
2041
     *               otherwise return <strong>null</strong>
2042
     */
2043 27
    public static function getSupportInfo(string $key = null)
2044
    {
2045 27
        if ($key === null) {
2046 4
            return self::$SUPPORT;
2047
        }
2048
2049 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2050 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2051
        }
2052
        // compatibility fix for old versions
2053 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2054
2055 25
        return self::$SUPPORT[$key] ?? null;
2056
    }
2057
2058
    /**
2059
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2060
     *          if you need more supported types, please use e.g. "finfo"
2061
     *
2062
     * @param string $str
2063
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2064
     *
2065
     * @return array
2066
     *               with this keys: 'ext', 'mime', 'type'
2067
     */
2068 39
    public static function get_file_type(
2069
        string $str,
2070
        array $fallback = [
2071
            'ext'  => null,
2072
            'mime' => 'application/octet-stream',
2073
            'type' => null,
2074
        ]
2075
    ): array {
2076 39
        if ($str === '') {
2077
            return $fallback;
2078
        }
2079
2080 39
        $str_info = \substr($str, 0, 2);
2081 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2082 11
            return $fallback;
2083
        }
2084
2085 35
        $str_info = \unpack('C2chars', $str_info);
2086 35
        if ($str_info === false) {
2087
            return $fallback;
2088
        }
2089 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2090
2091
        // DEBUG
2092
        //var_dump($type_code);
2093
2094
        switch ($type_code) {
2095 35
            case 3780:
2096 5
                $ext = 'pdf';
2097 5
                $mime = 'application/pdf';
2098 5
                $type = 'binary';
2099
2100 5
                break;
2101 35
            case 7790:
2102
                $ext = 'exe';
2103
                $mime = 'application/octet-stream';
2104
                $type = 'binary';
2105
2106
                break;
2107 35
            case 7784:
2108
                $ext = 'midi';
2109
                $mime = 'audio/x-midi';
2110
                $type = 'binary';
2111
2112
                break;
2113 35
            case 8075:
2114 7
                $ext = 'zip';
2115 7
                $mime = 'application/zip';
2116 7
                $type = 'binary';
2117
2118 7
                break;
2119 35
            case 8297:
2120
                $ext = 'rar';
2121
                $mime = 'application/rar';
2122
                $type = 'binary';
2123
2124
                break;
2125 35
            case 255216:
2126
                $ext = 'jpg';
2127
                $mime = 'image/jpeg';
2128
                $type = 'binary';
2129
2130
                break;
2131 35
            case 7173:
2132
                $ext = 'gif';
2133
                $mime = 'image/gif';
2134
                $type = 'binary';
2135
2136
                break;
2137 35
            case 6677:
2138
                $ext = 'bmp';
2139
                $mime = 'image/bmp';
2140
                $type = 'binary';
2141
2142
                break;
2143 35
            case 13780:
2144 7
                $ext = 'png';
2145 7
                $mime = 'image/png';
2146 7
                $type = 'binary';
2147
2148 7
                break;
2149
            default:
2150 32
                return $fallback;
2151
        }
2152
2153
        return [
2154 7
            'ext'  => $ext,
2155 7
            'mime' => $mime,
2156 7
            'type' => $type,
2157
        ];
2158
    }
2159
2160
    /**
2161
     * @param int    $length        <p>Length of the random string.</p>
2162
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2163
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2164
     *
2165
     * @return string
2166
     */
2167 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2168
    {
2169
        // init
2170 1
        $i = 0;
2171 1
        $str = '';
2172
2173
        //
2174
        // add random chars
2175
        //
2176
2177 1
        if ($encoding === 'UTF-8') {
2178 1
            $maxlength = (int) \mb_strlen($possibleChars);
2179 1
            if ($maxlength === 0) {
2180 1
                return '';
2181
            }
2182
2183 1
            while ($i < $length) {
2184
                try {
2185 1
                    $randInt = \random_int(0, $maxlength - 1);
2186
                } catch (\Exception $e) {
2187
                    /** @noinspection RandomApiMigrationInspection */
2188
                    $randInt = \mt_rand(0, $maxlength - 1);
2189
                }
2190 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2191 1
                if ($char !== false) {
2192 1
                    $str .= $char;
2193 1
                    ++$i;
2194
                }
2195
            }
2196
        } else {
2197
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2198
2199
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2200
            if ($maxlength === 0) {
2201
                return '';
2202
            }
2203
2204
            while ($i < $length) {
2205
                try {
2206
                    $randInt = \random_int(0, $maxlength - 1);
2207
                } catch (\Exception $e) {
2208
                    /** @noinspection RandomApiMigrationInspection */
2209
                    $randInt = \mt_rand(0, $maxlength - 1);
2210
                }
2211
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2212
                if ($char !== false) {
2213
                    $str .= $char;
2214
                    ++$i;
2215
                }
2216
            }
2217
        }
2218
2219 1
        return $str;
2220
    }
2221
2222
    /**
2223
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2224
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2225
     *
2226
     * @return string
2227
     */
2228 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2229
    {
2230 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2231 1
                        \session_id() .
2232 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2233 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2234 1
                        $entropyExtra;
2235
2236 1
        $uniqueString = \uniqid($uniqueHelper, true);
2237
2238 1
        if ($md5) {
2239 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2240
        }
2241
2242 1
        return $uniqueString;
2243
    }
2244
2245
    /**
2246
     * alias for "UTF8::string_has_bom()"
2247
     *
2248
     * @see        UTF8::string_has_bom()
2249
     *
2250
     * @param string $str
2251
     *
2252
     * @return bool
2253
     *
2254
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2255
     */
2256 2
    public static function hasBom(string $str): bool
2257
    {
2258 2
        return self::string_has_bom($str);
2259
    }
2260
2261
    /**
2262
     * Returns true if the string contains a lower case char, false otherwise.
2263
     *
2264
     * @param string $str <p>The input string.</p>
2265
     *
2266
     * @return bool whether or not the string contains a lower case character
2267
     */
2268 47
    public static function has_lowercase(string $str): bool
2269
    {
2270 47
        if (self::$SUPPORT['mbstring'] === true) {
2271
            /** @noinspection PhpComposerExtensionStubsInspection */
2272 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2273
        }
2274
2275
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2276
    }
2277
2278
    /**
2279
     * Returns true if the string contains an upper case char, false otherwise.
2280
     *
2281
     * @param string $str <p>The input string.</p>
2282
     *
2283
     * @return bool whether or not the string contains an upper case character
2284
     */
2285 12
    public static function has_uppercase(string $str): bool
2286
    {
2287 12
        if (self::$SUPPORT['mbstring'] === true) {
2288
            /** @noinspection PhpComposerExtensionStubsInspection */
2289 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2290
        }
2291
2292
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2293
    }
2294
2295
    /**
2296
     * Converts a hexadecimal-value into an UTF-8 character.
2297
     *
2298
     * @param string $hexdec <p>The hexadecimal value.</p>
2299
     *
2300
     * @return false|string one single UTF-8 character
2301
     */
2302 4
    public static function hex_to_chr(string $hexdec)
2303
    {
2304 4
        return self::decimal_to_chr(\hexdec($hexdec));
2305
    }
2306
2307
    /**
2308
     * Converts hexadecimal U+xxxx code point representation to integer.
2309
     *
2310
     * INFO: opposite to UTF8::int_to_hex()
2311
     *
2312
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2313
     *
2314
     * @return false|int the code point, or false on failure
2315
     */
2316 2
    public static function hex_to_int($hexDec)
2317
    {
2318
        // init
2319 2
        $hexDec = (string) $hexDec;
2320
2321 2
        if ($hexDec === '') {
2322 2
            return false;
2323
        }
2324
2325 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2326 2
            return \intval($match[1], 16);
2327
        }
2328
2329 2
        return false;
2330
    }
2331
2332
    /**
2333
     * alias for "UTF8::html_entity_decode()"
2334
     *
2335
     * @see UTF8::html_entity_decode()
2336
     *
2337
     * @param string $str
2338
     * @param int    $flags
2339
     * @param string $encoding
2340
     *
2341
     * @return string
2342
     */
2343 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2344
    {
2345 4
        return self::html_entity_decode($str, $flags, $encoding);
2346
    }
2347
2348
    /**
2349
     * Converts a UTF-8 string to a series of HTML numbered entities.
2350
     *
2351
     * INFO: opposite to UTF8::html_decode()
2352
     *
2353
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2354
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2355
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2356
     *
2357
     * @return string HTML numbered entities
2358
     */
2359 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2360
    {
2361 14
        if ($str === '') {
2362 4
            return '';
2363
        }
2364
2365 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2366 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2367
        }
2368
2369
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2370 14
        if (self::$SUPPORT['mbstring'] === true) {
2371 14
            $startCode = 0x00;
2372 14
            if ($keepAsciiChars === true) {
2373 13
                $startCode = 0x80;
2374
            }
2375
2376 14
            if ($encoding === 'UTF-8') {
2377 14
                return \mb_encode_numericentity(
2378 14
                    $str,
2379 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2380
                );
2381
            }
2382
2383 4
            return \mb_encode_numericentity(
2384 4
                $str,
2385 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2386 4
                $encoding
2387
            );
2388
        }
2389
2390
        //
2391
        // fallback via vanilla php
2392
        //
2393
2394
        return \implode(
2395
            '',
2396
            \array_map(
2397
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2398
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2399
                },
2400
                self::str_split($str)
2401
            )
2402
        );
2403
    }
2404
2405
    /**
2406
     * UTF-8 version of html_entity_decode()
2407
     *
2408
     * The reason we are not using html_entity_decode() by itself is because
2409
     * while it is not technically correct to leave out the semicolon
2410
     * at the end of an entity most browsers will still interpret the entity
2411
     * correctly. html_entity_decode() does not convert entities without
2412
     * semicolons, so we are left with our own little solution here. Bummer.
2413
     *
2414
     * Convert all HTML entities to their applicable characters
2415
     *
2416
     * INFO: opposite to UTF8::html_encode()
2417
     *
2418
     * @see http://php.net/manual/en/function.html-entity-decode.php
2419
     *
2420
     * @param string $str      <p>
2421
     *                         The input string.
2422
     *                         </p>
2423
     * @param int    $flags    [optional] <p>
2424
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2425
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2426
     *                         <table>
2427
     *                         Available <i>flags</i> constants
2428
     *                         <tr valign="top">
2429
     *                         <td>Constant Name</td>
2430
     *                         <td>Description</td>
2431
     *                         </tr>
2432
     *                         <tr valign="top">
2433
     *                         <td><b>ENT_COMPAT</b></td>
2434
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2435
     *                         </tr>
2436
     *                         <tr valign="top">
2437
     *                         <td><b>ENT_QUOTES</b></td>
2438
     *                         <td>Will convert both double and single quotes.</td>
2439
     *                         </tr>
2440
     *                         <tr valign="top">
2441
     *                         <td><b>ENT_NOQUOTES</b></td>
2442
     *                         <td>Will leave both double and single quotes unconverted.</td>
2443
     *                         </tr>
2444
     *                         <tr valign="top">
2445
     *                         <td><b>ENT_HTML401</b></td>
2446
     *                         <td>
2447
     *                         Handle code as HTML 4.01.
2448
     *                         </td>
2449
     *                         </tr>
2450
     *                         <tr valign="top">
2451
     *                         <td><b>ENT_XML1</b></td>
2452
     *                         <td>
2453
     *                         Handle code as XML 1.
2454
     *                         </td>
2455
     *                         </tr>
2456
     *                         <tr valign="top">
2457
     *                         <td><b>ENT_XHTML</b></td>
2458
     *                         <td>
2459
     *                         Handle code as XHTML.
2460
     *                         </td>
2461
     *                         </tr>
2462
     *                         <tr valign="top">
2463
     *                         <td><b>ENT_HTML5</b></td>
2464
     *                         <td>
2465
     *                         Handle code as HTML 5.
2466
     *                         </td>
2467
     *                         </tr>
2468
     *                         </table>
2469
     *                         </p>
2470
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2471
     *
2472
     * @return string the decoded string
2473
     */
2474 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2475
    {
2476
        if (
2477 46
            !isset($str[3]) // examples: &; || &x;
2478
            ||
2479 46
            \strpos($str, '&') === false // no "&"
2480
        ) {
2481 23
            return $str;
2482
        }
2483
2484 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2485 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2486
        }
2487
2488 44
        if ($flags === null) {
2489 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2490
        }
2491
2492
        if (
2493 44
            $encoding !== 'UTF-8'
2494
            &&
2495 44
            $encoding !== 'ISO-8859-1'
2496
            &&
2497 44
            $encoding !== 'WINDOWS-1252'
2498
            &&
2499 44
            self::$SUPPORT['mbstring'] === false
2500
        ) {
2501
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2502
        }
2503
2504
        do {
2505 44
            $str_compare = $str;
2506
2507
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2508 44
            if (self::$SUPPORT['mbstring'] === true) {
2509 44
                if ($encoding === 'UTF-8') {
2510 44
                    $str = \mb_decode_numericentity(
2511 44
                        $str,
2512 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2513
                    );
2514
                } else {
2515 4
                    $str = \mb_decode_numericentity(
2516 4
                        $str,
2517 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2518 44
                        $encoding
2519
                    );
2520
                }
2521
            } else {
2522
                $str = (string) \preg_replace_callback(
2523
                    "/&#\d{2,6};/",
2524
                    /**
2525
                     * @param string[] $matches
2526
                     *
2527
                     * @return string
2528
                     */
2529
                    static function (array $matches) use ($encoding): string {
2530
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2531
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2532
                            return $returnTmp;
2533
                        }
2534
2535
                        return $matches[0];
2536
                    },
2537
                    $str
2538
                );
2539
            }
2540
2541 44
            if (\strpos($str, '&') !== false) {
2542 40
                if (\strpos($str, '&#') !== false) {
2543
                    // decode also numeric & UTF16 two byte entities
2544 32
                    $str = (string) \preg_replace(
2545 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2546 32
                        '$1;',
2547 32
                        $str
2548
                    );
2549
                }
2550
2551 40
                $str = \html_entity_decode(
2552 40
                    $str,
2553 40
                    $flags,
2554 40
                    $encoding
2555
                );
2556
            }
2557 44
        } while ($str_compare !== $str);
2558
2559 44
        return $str;
2560
    }
2561
2562
    /**
2563
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2564
     *
2565
     * @param string $str
2566
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2567
     *
2568
     * @return string
2569
     */
2570 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2571
    {
2572 6
        return self::htmlspecialchars(
2573 6
            $str,
2574 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2575 6
            $encoding
2576
        );
2577
    }
2578
2579
    /**
2580
     * Remove empty html-tag.
2581
     *
2582
     * e.g.: <tag></tag>
2583
     *
2584
     * @param string $str
2585
     *
2586
     * @return string
2587
     */
2588 1
    public static function html_stripe_empty_tags(string $str): string
2589
    {
2590 1
        return (string) \preg_replace(
2591 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/u",
2592 1
            '',
2593 1
            $str
2594
        );
2595
    }
2596
2597
    /**
2598
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2599
     *
2600
     * @see http://php.net/manual/en/function.htmlentities.php
2601
     *
2602
     * @param string $str           <p>
2603
     *                              The input string.
2604
     *                              </p>
2605
     * @param int    $flags         [optional] <p>
2606
     *                              A bitmask of one or more of the following flags, which specify how to handle
2607
     *                              quotes, invalid code unit sequences and the used document type. The default is
2608
     *                              ENT_COMPAT | ENT_HTML401.
2609
     *                              <table>
2610
     *                              Available <i>flags</i> constants
2611
     *                              <tr valign="top">
2612
     *                              <td>Constant Name</td>
2613
     *                              <td>Description</td>
2614
     *                              </tr>
2615
     *                              <tr valign="top">
2616
     *                              <td><b>ENT_COMPAT</b></td>
2617
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2618
     *                              </tr>
2619
     *                              <tr valign="top">
2620
     *                              <td><b>ENT_QUOTES</b></td>
2621
     *                              <td>Will convert both double and single quotes.</td>
2622
     *                              </tr>
2623
     *                              <tr valign="top">
2624
     *                              <td><b>ENT_NOQUOTES</b></td>
2625
     *                              <td>Will leave both double and single quotes unconverted.</td>
2626
     *                              </tr>
2627
     *                              <tr valign="top">
2628
     *                              <td><b>ENT_IGNORE</b></td>
2629
     *                              <td>
2630
     *                              Silently discard invalid code unit sequences instead of returning
2631
     *                              an empty string. Using this flag is discouraged as it
2632
     *                              may have security implications.
2633
     *                              </td>
2634
     *                              </tr>
2635
     *                              <tr valign="top">
2636
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2637
     *                              <td>
2638
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2639
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2640
     *                              string.
2641
     *                              </td>
2642
     *                              </tr>
2643
     *                              <tr valign="top">
2644
     *                              <td><b>ENT_DISALLOWED</b></td>
2645
     *                              <td>
2646
     *                              Replace invalid code points for the given document type with a
2647
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2648
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2649
     *                              instance, to ensure the well-formedness of XML documents with
2650
     *                              embedded external content.
2651
     *                              </td>
2652
     *                              </tr>
2653
     *                              <tr valign="top">
2654
     *                              <td><b>ENT_HTML401</b></td>
2655
     *                              <td>
2656
     *                              Handle code as HTML 4.01.
2657
     *                              </td>
2658
     *                              </tr>
2659
     *                              <tr valign="top">
2660
     *                              <td><b>ENT_XML1</b></td>
2661
     *                              <td>
2662
     *                              Handle code as XML 1.
2663
     *                              </td>
2664
     *                              </tr>
2665
     *                              <tr valign="top">
2666
     *                              <td><b>ENT_XHTML</b></td>
2667
     *                              <td>
2668
     *                              Handle code as XHTML.
2669
     *                              </td>
2670
     *                              </tr>
2671
     *                              <tr valign="top">
2672
     *                              <td><b>ENT_HTML5</b></td>
2673
     *                              <td>
2674
     *                              Handle code as HTML 5.
2675
     *                              </td>
2676
     *                              </tr>
2677
     *                              </table>
2678
     *                              </p>
2679
     * @param string $encoding      [optional] <p>
2680
     *                              Like <b>htmlspecialchars</b>,
2681
     *                              <b>htmlentities</b> takes an optional third argument
2682
     *                              <i>encoding</i> which defines encoding used in
2683
     *                              conversion.
2684
     *                              Although this argument is technically optional, you are highly
2685
     *                              encouraged to specify the correct value for your code.
2686
     *                              </p>
2687
     * @param bool   $double_encode [optional] <p>
2688
     *                              When <i>double_encode</i> is turned off PHP will not
2689
     *                              encode existing html entities. The default is to convert everything.
2690
     *                              </p>
2691
     *
2692
     * @return string
2693
     *                <p>
2694
     *                The encoded string.
2695
     *                <br><br>
2696
     *                If the input <i>string</i> contains an invalid code unit
2697
     *                sequence within the given <i>encoding</i> an empty string
2698
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2699
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2700
     *                </p>
2701
     */
2702 9
    public static function htmlentities(
2703
        string $str,
2704
        int $flags = \ENT_COMPAT,
2705
        string $encoding = 'UTF-8',
2706
        bool $double_encode = true
2707
    ): string {
2708 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2709 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2710
        }
2711
2712 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2713
2714
        /**
2715
         * PHP doesn't replace a backslash to its html entity since this is something
2716
         * that's mostly used to escape characters when inserting in a database. Since
2717
         * we're using a decent database layer, we don't need this shit and we're replacing
2718
         * the double backslashes by its' html entity equivalent.
2719
         *
2720
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2721
         */
2722 9
        $str = \str_replace('\\', '&#92;', $str);
2723
2724 9
        return self::html_encode($str, true, $encoding);
2725
    }
2726
2727
    /**
2728
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2729
     *
2730
     * INFO: Take a look at "UTF8::htmlentities()"
2731
     *
2732
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2733
     *
2734
     * @param string $str           <p>
2735
     *                              The string being converted.
2736
     *                              </p>
2737
     * @param int    $flags         [optional] <p>
2738
     *                              A bitmask of one or more of the following flags, which specify how to handle
2739
     *                              quotes, invalid code unit sequences and the used document type. The default is
2740
     *                              ENT_COMPAT | ENT_HTML401.
2741
     *                              <table>
2742
     *                              Available <i>flags</i> constants
2743
     *                              <tr valign="top">
2744
     *                              <td>Constant Name</td>
2745
     *                              <td>Description</td>
2746
     *                              </tr>
2747
     *                              <tr valign="top">
2748
     *                              <td><b>ENT_COMPAT</b></td>
2749
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2750
     *                              </tr>
2751
     *                              <tr valign="top">
2752
     *                              <td><b>ENT_QUOTES</b></td>
2753
     *                              <td>Will convert both double and single quotes.</td>
2754
     *                              </tr>
2755
     *                              <tr valign="top">
2756
     *                              <td><b>ENT_NOQUOTES</b></td>
2757
     *                              <td>Will leave both double and single quotes unconverted.</td>
2758
     *                              </tr>
2759
     *                              <tr valign="top">
2760
     *                              <td><b>ENT_IGNORE</b></td>
2761
     *                              <td>
2762
     *                              Silently discard invalid code unit sequences instead of returning
2763
     *                              an empty string. Using this flag is discouraged as it
2764
     *                              may have security implications.
2765
     *                              </td>
2766
     *                              </tr>
2767
     *                              <tr valign="top">
2768
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2769
     *                              <td>
2770
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2771
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2772
     *                              string.
2773
     *                              </td>
2774
     *                              </tr>
2775
     *                              <tr valign="top">
2776
     *                              <td><b>ENT_DISALLOWED</b></td>
2777
     *                              <td>
2778
     *                              Replace invalid code points for the given document type with a
2779
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2780
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2781
     *                              instance, to ensure the well-formedness of XML documents with
2782
     *                              embedded external content.
2783
     *                              </td>
2784
     *                              </tr>
2785
     *                              <tr valign="top">
2786
     *                              <td><b>ENT_HTML401</b></td>
2787
     *                              <td>
2788
     *                              Handle code as HTML 4.01.
2789
     *                              </td>
2790
     *                              </tr>
2791
     *                              <tr valign="top">
2792
     *                              <td><b>ENT_XML1</b></td>
2793
     *                              <td>
2794
     *                              Handle code as XML 1.
2795
     *                              </td>
2796
     *                              </tr>
2797
     *                              <tr valign="top">
2798
     *                              <td><b>ENT_XHTML</b></td>
2799
     *                              <td>
2800
     *                              Handle code as XHTML.
2801
     *                              </td>
2802
     *                              </tr>
2803
     *                              <tr valign="top">
2804
     *                              <td><b>ENT_HTML5</b></td>
2805
     *                              <td>
2806
     *                              Handle code as HTML 5.
2807
     *                              </td>
2808
     *                              </tr>
2809
     *                              </table>
2810
     *                              </p>
2811
     * @param string $encoding      [optional] <p>
2812
     *                              Defines encoding used in conversion.
2813
     *                              </p>
2814
     *                              <p>
2815
     *                              For the purposes of this function, the encodings
2816
     *                              ISO-8859-1, ISO-8859-15,
2817
     *                              UTF-8, cp866,
2818
     *                              cp1251, cp1252, and
2819
     *                              KOI8-R are effectively equivalent, provided the
2820
     *                              <i>string</i> itself is valid for the encoding, as
2821
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2822
     *                              the same positions in all of these encodings.
2823
     *                              </p>
2824
     * @param bool   $double_encode [optional] <p>
2825
     *                              When <i>double_encode</i> is turned off PHP will not
2826
     *                              encode existing html entities, the default is to convert everything.
2827
     *                              </p>
2828
     *
2829
     * @return string the converted string.
2830
     *                </p>
2831
     *                <p>
2832
     *                If the input <i>string</i> contains an invalid code unit
2833
     *                sequence within the given <i>encoding</i> an empty string
2834
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2835
     *                <b>ENT_SUBSTITUTE</b> flags are set
2836
     */
2837 8
    public static function htmlspecialchars(
2838
        string $str,
2839
        int $flags = \ENT_COMPAT,
2840
        string $encoding = 'UTF-8',
2841
        bool $double_encode = true
2842
    ): string {
2843 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2844 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2845
        }
2846
2847 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2848
    }
2849
2850
    /**
2851
     * Checks whether iconv is available on the server.
2852
     *
2853
     * @return bool
2854
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2855
     */
2856
    public static function iconv_loaded(): bool
2857
    {
2858
        return \extension_loaded('iconv');
2859
    }
2860
2861
    /**
2862
     * alias for "UTF8::decimal_to_chr()"
2863
     *
2864
     * @see UTF8::decimal_to_chr()
2865
     *
2866
     * @param mixed $int
2867
     *
2868
     * @return string
2869
     */
2870 4
    public static function int_to_chr($int): string
2871
    {
2872 4
        return self::decimal_to_chr($int);
2873
    }
2874
2875
    /**
2876
     * Converts Integer to hexadecimal U+xxxx code point representation.
2877
     *
2878
     * INFO: opposite to UTF8::hex_to_int()
2879
     *
2880
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2881
     * @param string $pfix [optional]
2882
     *
2883
     * @return string the code point, or empty string on failure
2884
     */
2885 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2886
    {
2887 6
        $hex = \dechex($int);
2888
2889 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2890
2891 6
        return $pfix . $hex . '';
2892
    }
2893
2894
    /**
2895
     * Checks whether intl-char is available on the server.
2896
     *
2897
     * @return bool
2898
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2899
     */
2900
    public static function intlChar_loaded(): bool
2901
    {
2902
        return \class_exists('IntlChar');
2903
    }
2904
2905
    /**
2906
     * Checks whether intl is available on the server.
2907
     *
2908
     * @return bool
2909
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2910
     */
2911 5
    public static function intl_loaded(): bool
2912
    {
2913 5
        return \extension_loaded('intl');
2914
    }
2915
2916
    /**
2917
     * alias for "UTF8::is_ascii()"
2918
     *
2919
     * @see        UTF8::is_ascii()
2920
     *
2921
     * @param string $str
2922
     *
2923
     * @return bool
2924
     *
2925
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2926
     */
2927 2
    public static function isAscii(string $str): bool
2928
    {
2929 2
        return self::is_ascii($str);
2930
    }
2931
2932
    /**
2933
     * alias for "UTF8::is_base64()"
2934
     *
2935
     * @see        UTF8::is_base64()
2936
     *
2937
     * @param string $str
2938
     *
2939
     * @return bool
2940
     *
2941
     * @deprecated <p>use "UTF8::is_base64()"</p>
2942
     */
2943 2
    public static function isBase64($str): bool
2944
    {
2945 2
        return self::is_base64($str);
2946
    }
2947
2948
    /**
2949
     * alias for "UTF8::is_binary()"
2950
     *
2951
     * @see        UTF8::is_binary()
2952
     *
2953
     * @param mixed $str
2954
     * @param bool  $strict
2955
     *
2956
     * @return bool
2957
     *
2958
     * @deprecated <p>use "UTF8::is_binary()"</p>
2959
     */
2960 4
    public static function isBinary($str, $strict = false): bool
2961
    {
2962 4
        return self::is_binary($str, $strict);
2963
    }
2964
2965
    /**
2966
     * alias for "UTF8::is_bom()"
2967
     *
2968
     * @see        UTF8::is_bom()
2969
     *
2970
     * @param string $utf8_chr
2971
     *
2972
     * @return bool
2973
     *
2974
     * @deprecated <p>use "UTF8::is_bom()"</p>
2975
     */
2976 2
    public static function isBom(string $utf8_chr): bool
2977
    {
2978 2
        return self::is_bom($utf8_chr);
2979
    }
2980
2981
    /**
2982
     * alias for "UTF8::is_html()"
2983
     *
2984
     * @see        UTF8::is_html()
2985
     *
2986
     * @param string $str
2987
     *
2988
     * @return bool
2989
     *
2990
     * @deprecated <p>use "UTF8::is_html()"</p>
2991
     */
2992 2
    public static function isHtml(string $str): bool
2993
    {
2994 2
        return self::is_html($str);
2995
    }
2996
2997
    /**
2998
     * alias for "UTF8::is_json()"
2999
     *
3000
     * @see        UTF8::is_json()
3001
     *
3002
     * @param string $str
3003
     *
3004
     * @return bool
3005
     *
3006
     * @deprecated <p>use "UTF8::is_json()"</p>
3007
     */
3008
    public static function isJson(string $str): bool
3009
    {
3010
        return self::is_json($str);
3011
    }
3012
3013
    /**
3014
     * alias for "UTF8::is_utf16()"
3015
     *
3016
     * @see        UTF8::is_utf16()
3017
     *
3018
     * @param mixed $str
3019
     *
3020
     * @return false|int
3021
     *                   <strong>false</strong> if is't not UTF16,<br>
3022
     *                   <strong>1</strong> for UTF-16LE,<br>
3023
     *                   <strong>2</strong> for UTF-16BE
3024
     *
3025
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3026
     */
3027 2
    public static function isUtf16($str)
3028
    {
3029 2
        return self::is_utf16($str);
3030
    }
3031
3032
    /**
3033
     * alias for "UTF8::is_utf32()"
3034
     *
3035
     * @see        UTF8::is_utf32()
3036
     *
3037
     * @param mixed $str
3038
     *
3039
     * @return false|int
3040
     *                   <strong>false</strong> if is't not UTF16,
3041
     *                   <strong>1</strong> for UTF-32LE,
3042
     *                   <strong>2</strong> for UTF-32BE
3043
     *
3044
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3045
     */
3046 2
    public static function isUtf32($str)
3047
    {
3048 2
        return self::is_utf32($str);
3049
    }
3050
3051
    /**
3052
     * alias for "UTF8::is_utf8()"
3053
     *
3054
     * @see        UTF8::is_utf8()
3055
     *
3056
     * @param string $str
3057
     * @param bool   $strict
3058
     *
3059
     * @return bool
3060
     *
3061
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3062
     */
3063 17
    public static function isUtf8($str, $strict = false): bool
3064
    {
3065 17
        return self::is_utf8($str, $strict);
3066
    }
3067
3068
    /**
3069
     * Returns true if the string contains only alphabetic chars, false otherwise.
3070
     *
3071
     * @param string $str
3072
     *
3073
     * @return bool
3074
     *              Whether or not $str contains only alphabetic chars
3075
     */
3076 10
    public static function is_alpha(string $str): bool
3077
    {
3078 10
        if (self::$SUPPORT['mbstring'] === true) {
3079
            /** @noinspection PhpComposerExtensionStubsInspection */
3080 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3081
        }
3082
3083
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3084
    }
3085
3086
    /**
3087
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3088
     *
3089
     * @param string $str
3090
     *
3091
     * @return bool
3092
     *              Whether or not $str contains only alphanumeric chars
3093
     */
3094 13
    public static function is_alphanumeric(string $str): bool
3095
    {
3096 13
        if (self::$SUPPORT['mbstring'] === true) {
3097
            /** @noinspection PhpComposerExtensionStubsInspection */
3098 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3099
        }
3100
3101
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3102
    }
3103
3104
    /**
3105
     * Checks if a string is 7 bit ASCII.
3106
     *
3107
     * @param string $str <p>The string to check.</p>
3108
     *
3109
     * @return bool
3110
     *              <strong>true</strong> if it is ASCII<br>
3111
     *              <strong>false</strong> otherwise
3112
     */
3113 137
    public static function is_ascii(string $str): bool
3114
    {
3115 137
        if ($str === '') {
3116 10
            return true;
3117
        }
3118
3119 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3120
    }
3121
3122
    /**
3123
     * Returns true if the string is base64 encoded, false otherwise.
3124
     *
3125
     * @param mixed|string $str                <p>The input string.</p>
3126
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3127
     *
3128
     * @return bool whether or not $str is base64 encoded
3129
     */
3130 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3131
    {
3132 16
        if ($emptyStringIsValid === false && $str === '') {
3133 3
            return false;
3134
        }
3135
3136
        /**
3137
         * @psalm-suppress RedundantConditionGivenDocblockType
3138
         */
3139 15
        if (\is_string($str) === false) {
3140 2
            return false;
3141
        }
3142
3143 15
        $base64String = \base64_decode($str, true);
3144
3145 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3146
    }
3147
3148
    /**
3149
     * Check if the input is binary... (is look like a hack).
3150
     *
3151
     * @param mixed $input
3152
     * @param bool  $strict
3153
     *
3154
     * @return bool
3155
     */
3156 39
    public static function is_binary($input, bool $strict = false): bool
3157
    {
3158 39
        $input = (string) $input;
3159 39
        if ($input === '') {
3160 10
            return false;
3161
        }
3162
3163 39
        if (\preg_match('~^[01]+$~', $input)) {
3164 13
            return true;
3165
        }
3166
3167 39
        $ext = self::get_file_type($input);
3168 39
        if ($ext['type'] === 'binary') {
3169 7
            return true;
3170
        }
3171
3172 36
        $testLength = \strlen($input);
3173 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3174 36
        if (($testNull / $testLength) > 0.25) {
3175 12
            return true;
3176
        }
3177
3178 34
        if ($strict === true) {
3179 34
            if (self::$SUPPORT['finfo'] === false) {
3180
                throw new \RuntimeException('ext-fileinfo: is not installed');
3181
            }
3182
3183
            /** @noinspection PhpComposerExtensionStubsInspection */
3184 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3185 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3186 15
                return true;
3187
            }
3188
        }
3189
3190 30
        return false;
3191
    }
3192
3193
    /**
3194
     * Check if the file is binary.
3195
     *
3196
     * @param string $file
3197
     *
3198
     * @return bool
3199
     */
3200 6
    public static function is_binary_file($file): bool
3201
    {
3202
        // init
3203 6
        $block = '';
3204
3205 6
        $fp = \fopen($file, 'rb');
3206 6
        if (\is_resource($fp)) {
3207 6
            $block = \fread($fp, 512);
3208 6
            \fclose($fp);
3209
        }
3210
3211 6
        if ($block === '') {
3212 2
            return false;
3213
        }
3214
3215 6
        return self::is_binary($block, true);
3216
    }
3217
3218
    /**
3219
     * Returns true if the string contains only whitespace chars, false otherwise.
3220
     *
3221
     * @param string $str
3222
     *
3223
     * @return bool
3224
     *              Whether or not $str contains only whitespace characters
3225
     */
3226 15
    public static function is_blank(string $str): bool
3227
    {
3228 15
        if (self::$SUPPORT['mbstring'] === true) {
3229
            /** @noinspection PhpComposerExtensionStubsInspection */
3230 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3231
        }
3232
3233
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3234
    }
3235
3236
    /**
3237
     * Checks if the given string is equal to any "Byte Order Mark".
3238
     *
3239
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3240
     *
3241
     * @param string $str <p>The input string.</p>
3242
     *
3243
     * @return bool
3244
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3245
     */
3246 2
    public static function is_bom($str): bool
3247
    {
3248
        /** @noinspection PhpUnusedLocalVariableInspection */
3249 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3250 2
            if ($str === $bomString) {
3251 2
                return true;
3252
            }
3253
        }
3254
3255 2
        return false;
3256
    }
3257
3258
    /**
3259
     * Determine whether the string is considered to be empty.
3260
     *
3261
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3262
     * empty() does not generate a warning if the variable does not exist.
3263
     *
3264
     * @param mixed $str
3265
     *
3266
     * @return bool whether or not $str is empty()
3267
     */
3268
    public static function is_empty($str): bool
3269
    {
3270
        return empty($str);
3271
    }
3272
3273
    /**
3274
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3275
     *
3276
     * @param string $str
3277
     *
3278
     * @return bool
3279
     *              Whether or not $str contains only hexadecimal chars
3280
     */
3281 13
    public static function is_hexadecimal(string $str): bool
3282
    {
3283 13
        if (self::$SUPPORT['mbstring'] === true) {
3284
            /** @noinspection PhpComposerExtensionStubsInspection */
3285 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3286
        }
3287
3288
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3289
    }
3290
3291
    /**
3292
     * Check if the string contains any html-tags <lall>.
3293
     *
3294
     * @param string $str <p>The input string.</p>
3295
     *
3296
     * @return bool
3297
     */
3298 3
    public static function is_html(string $str): bool
3299
    {
3300 3
        if ($str === '') {
3301 3
            return false;
3302
        }
3303
3304
        // init
3305 3
        $matches = [];
3306
3307 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/u", $str, $matches);
3308
3309 3
        return \count($matches) !== 0;
3310
    }
3311
3312
    /**
3313
     * Try to check if "$str" is an json-string.
3314
     *
3315
     * @param string $str                              <p>The input string.</p>
3316
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3317
     *
3318
     * @return bool
3319
     */
3320 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3321
    {
3322 42
        if ($str === '') {
3323 4
            return false;
3324
        }
3325
3326 40
        if (self::$SUPPORT['json'] === false) {
3327
            throw new \RuntimeException('ext-json: is not installed');
3328
        }
3329
3330 40
        $json = self::json_decode($str);
3331 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3332 18
            return false;
3333
        }
3334
3335
        if (
3336 24
            $onlyArrayOrObjectResultsAreValid === true
3337
            &&
3338 24
            \is_object($json) === false
3339
            &&
3340 24
            \is_array($json) === false
3341
        ) {
3342 5
            return false;
3343
        }
3344
3345
        /** @noinspection PhpComposerExtensionStubsInspection */
3346 19
        return \json_last_error() === \JSON_ERROR_NONE;
3347
    }
3348
3349
    /**
3350
     * @param string $str
3351
     *
3352
     * @return bool
3353
     */
3354 8
    public static function is_lowercase(string $str): bool
3355
    {
3356 8
        if (self::$SUPPORT['mbstring'] === true) {
3357
            /** @noinspection PhpComposerExtensionStubsInspection */
3358 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3359
        }
3360
3361
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3362
    }
3363
3364
    /**
3365
     * Returns true if the string is serialized, false otherwise.
3366
     *
3367
     * @param string $str
3368
     *
3369
     * @return bool whether or not $str is serialized
3370
     */
3371 7
    public static function is_serialized(string $str): bool
3372
    {
3373 7
        if ($str === '') {
3374 1
            return false;
3375
        }
3376
3377
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3378
        /** @noinspection UnserializeExploitsInspection */
3379 6
        return $str === 'b:0;'
3380
               ||
3381 6
               @\unserialize($str) !== false;
3382
    }
3383
3384
    /**
3385
     * Returns true if the string contains only lower case chars, false
3386
     * otherwise.
3387
     *
3388
     * @param string $str <p>The input string.</p>
3389
     *
3390
     * @return bool
3391
     *              Whether or not $str contains only lower case characters
3392
     */
3393 8
    public static function is_uppercase(string $str): bool
3394
    {
3395 8
        if (self::$SUPPORT['mbstring'] === true) {
3396
            /** @noinspection PhpComposerExtensionStubsInspection */
3397 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3398
        }
3399
3400
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3401
    }
3402
3403
    /**
3404
     * Check if the string is UTF-16.
3405
     *
3406
     * @param mixed $str                   <p>The input string.</p>
3407
     * @param bool  $checkIfStringIsBinary
3408
     *
3409
     * @return false|int
3410
     *                   <strong>false</strong> if is't not UTF-16,<br>
3411
     *                   <strong>1</strong> for UTF-16LE,<br>
3412
     *                   <strong>2</strong> for UTF-16BE
3413
     */
3414 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3415
    {
3416
        // init
3417 22
        $str = (string) $str;
3418 22
        $strChars = [];
3419
3420
        if (
3421 22
            $checkIfStringIsBinary === true
3422
            &&
3423 22
            self::is_binary($str, true) === false
3424
        ) {
3425 2
            return false;
3426
        }
3427
3428 22
        if (self::$SUPPORT['mbstring'] === false) {
3429 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3430
        }
3431
3432 22
        $str = self::remove_bom($str);
3433
3434 22
        $maybeUTF16LE = 0;
3435 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3436 22
        if ($test) {
3437 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3438 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3439 15
            if ($test3 === $test) {
3440 15
                if (\count($strChars) === 0) {
3441 15
                    $strChars = self::count_chars($str, true, false);
3442
                }
3443 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3444 15
                    if (\in_array($test3char, $strChars, true) === true) {
3445 15
                        ++$maybeUTF16LE;
3446
                    }
3447
                }
3448 15
                unset($test3charEmpty);
3449
            }
3450
        }
3451
3452 22
        $maybeUTF16BE = 0;
3453 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3454 22
        if ($test) {
3455 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3456 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3457 15
            if ($test3 === $test) {
3458 15
                if (\count($strChars) === 0) {
3459 7
                    $strChars = self::count_chars($str, true, false);
3460
                }
3461 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3462 15
                    if (\in_array($test3char, $strChars, true) === true) {
3463 15
                        ++$maybeUTF16BE;
3464
                    }
3465
                }
3466 15
                unset($test3charEmpty);
3467
            }
3468
        }
3469
3470 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3471 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3472 4
                return 1;
3473
            }
3474
3475 6
            return 2;
3476
        }
3477
3478 18
        return false;
3479
    }
3480
3481
    /**
3482
     * Check if the string is UTF-32.
3483
     *
3484
     * @param mixed $str                   <p>The input string.</p>
3485
     * @param bool  $checkIfStringIsBinary
3486
     *
3487
     * @return false|int
3488
     *                   <strong>false</strong> if is't not UTF-32,<br>
3489
     *                   <strong>1</strong> for UTF-32LE,<br>
3490
     *                   <strong>2</strong> for UTF-32BE
3491
     */
3492 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3493
    {
3494
        // init
3495 18
        $str = (string) $str;
3496 18
        $strChars = [];
3497
3498
        if (
3499 18
            $checkIfStringIsBinary === true
3500
            &&
3501 18
            self::is_binary($str, true) === false
3502
        ) {
3503 2
            return false;
3504
        }
3505
3506 18
        if (self::$SUPPORT['mbstring'] === false) {
3507 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3508
        }
3509
3510 18
        $str = self::remove_bom($str);
3511
3512 18
        $maybeUTF32LE = 0;
3513 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3514 18
        if ($test) {
3515 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3516 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3517 11
            if ($test3 === $test) {
3518 11
                if (\count($strChars) === 0) {
3519 11
                    $strChars = self::count_chars($str, true, false);
3520
                }
3521 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3522 11
                    if (\in_array($test3char, $strChars, true) === true) {
3523 11
                        ++$maybeUTF32LE;
3524
                    }
3525
                }
3526 11
                unset($test3charEmpty);
3527
            }
3528
        }
3529
3530 18
        $maybeUTF32BE = 0;
3531 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3532 18
        if ($test) {
3533 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3534 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3535 11
            if ($test3 === $test) {
3536 11
                if (\count($strChars) === 0) {
3537 7
                    $strChars = self::count_chars($str, true, false);
3538
                }
3539 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3540 11
                    if (\in_array($test3char, $strChars, true) === true) {
3541 11
                        ++$maybeUTF32BE;
3542
                    }
3543
                }
3544 11
                unset($test3charEmpty);
3545
            }
3546
        }
3547
3548 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3549 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3550 2
                return 1;
3551
            }
3552
3553 2
            return 2;
3554
        }
3555
3556 18
        return false;
3557
    }
3558
3559
    /**
3560
     * Encode a string with emoji chars into a non-emoji string.
3561
     *
3562
     * @param string $str                        <p>The input string</p>
3563
     * @param bool   $useReversibleStringMapping [optional] <p>
3564
     *                                           when <b>TRUE</b>, we se a reversible string mapping
3565
     *                                           between "emoji_encode" and "emoji_decode"</p>
3566
     *
3567
     * @return string
3568
     */
3569 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
3570
    {
3571 9
        self::initEmojiData();
3572
3573 9
        if ($useReversibleStringMapping === true) {
3574 9
            return (string) \str_replace(
3575 9
                (array) self::$EMOJI_VALUES_CACHE,
3576 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3577 9
                $str
3578
            );
3579
        }
3580
3581 1
        return (string) \str_replace(
3582 1
            (array) self::$EMOJI_VALUES_CACHE,
3583 1
            (array) self::$EMOJI_KEYS_CACHE,
3584 1
            $str
3585
        );
3586
    }
3587
3588
    /**
3589
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
3590
     *
3591
     * @param string $str                        <p>The input string.</p>
3592
     * @param bool   $useReversibleStringMapping [optional] <p>
3593
     *                                           When <b>TRUE</b>, we se a reversible string mapping
3594
     *                                           between "emoji_encode" and "emoji_decode".</p>
3595
     *
3596
     * @return string
3597
     */
3598 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
3599
    {
3600 9
        self::initEmojiData();
3601
3602 9
        if ($useReversibleStringMapping === true) {
3603 9
            return (string) \str_replace(
3604 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3605 9
                (array) self::$EMOJI_VALUES_CACHE,
3606 9
                $str
3607
            );
3608
        }
3609
3610 1
        return (string) \str_replace(
3611 1
            (array) self::$EMOJI_KEYS_CACHE,
3612 1
            (array) self::$EMOJI_VALUES_CACHE,
3613 1
            $str
3614
        );
3615
    }
3616
3617
    /**
3618
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3619
     *
3620
     * @see    http://hsivonen.iki.fi/php-utf8/
3621
     *
3622
     * @param string|string[] $str    <p>The string to be checked.</p>
3623
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3624
     *
3625
     * @return bool
3626
     */
3627 106
    public static function is_utf8($str, bool $strict = false): bool
3628
    {
3629 106
        if (\is_array($str) === true) {
3630 2
            foreach ($str as &$v) {
3631 2
                if (self::is_utf8($v, $strict) === false) {
3632 2
                    return false;
3633
                }
3634
            }
3635
3636
            return true;
3637
        }
3638
3639 106
        if ($str === '') {
3640 12
            return true;
3641
        }
3642
3643 102
        if ($strict === true) {
3644 2
            $isBinary = self::is_binary($str, true);
3645
3646 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3647 2
                return false;
3648
            }
3649
3650
            if ($isBinary && self::is_utf32($str, false) !== false) {
3651
                return false;
3652
            }
3653
        }
3654
3655 102
        if (self::pcre_utf8_support() !== true) {
3656
3657
            // If even just the first character can be matched, when the /u
3658
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3659
            // invalid, nothing at all will match, even if the string contains
3660
            // some valid sequences
3661
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3662
        }
3663
3664 102
        $mState = 0; // cached expected number of octets after the current octet
3665
        // until the beginning of the next UTF8 character sequence
3666 102
        $mUcs4 = 0; // cached Unicode character
3667 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3668
3669 102
        if (self::$ORD === null) {
3670
            self::$ORD = self::getData('ord');
3671
        }
3672
3673 102
        $len = \strlen((string) $str);
3674
        /** @noinspection ForeachInvariantsInspection */
3675 102
        for ($i = 0; $i < $len; ++$i) {
3676 102
            $in = self::$ORD[$str[$i]];
3677 102
            if ($mState === 0) {
3678
                // When mState is zero we expect either a US-ASCII character or a
3679
                // multi-octet sequence.
3680 102
                if ((0x80 & $in) === 0) {
3681
                    // US-ASCII, pass straight through.
3682 97
                    $mBytes = 1;
3683 83
                } elseif ((0xE0 & $in) === 0xC0) {
3684
                    // First octet of 2 octet sequence.
3685 73
                    $mUcs4 = $in;
3686 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3687 73
                    $mState = 1;
3688 73
                    $mBytes = 2;
3689 58
                } elseif ((0xF0 & $in) === 0xE0) {
3690
                    // First octet of 3 octet sequence.
3691 42
                    $mUcs4 = $in;
3692 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3693 42
                    $mState = 2;
3694 42
                    $mBytes = 3;
3695 29
                } elseif ((0xF8 & $in) === 0xF0) {
3696
                    // First octet of 4 octet sequence.
3697 18
                    $mUcs4 = $in;
3698 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3699 18
                    $mState = 3;
3700 18
                    $mBytes = 4;
3701 13
                } elseif ((0xFC & $in) === 0xF8) {
3702
                    /* First octet of 5 octet sequence.
3703
                     *
3704
                     * This is illegal because the encoded codepoint must be either
3705
                     * (a) not the shortest form or
3706
                     * (b) outside the Unicode range of 0-0x10FFFF.
3707
                     * Rather than trying to resynchronize, we will carry on until the end
3708
                     * of the sequence and let the later error handling code catch it.
3709
                     */
3710 5
                    $mUcs4 = $in;
3711 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3712 5
                    $mState = 4;
3713 5
                    $mBytes = 5;
3714 10
                } elseif ((0xFE & $in) === 0xFC) {
3715
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3716 5
                    $mUcs4 = $in;
3717 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3718 5
                    $mState = 5;
3719 5
                    $mBytes = 6;
3720
                } else {
3721
                    // Current octet is neither in the US-ASCII range nor a legal first
3722
                    // octet of a multi-octet sequence.
3723 102
                    return false;
3724
                }
3725 83
            } elseif ((0xC0 & $in) === 0x80) {
3726
3727
                // When mState is non-zero, we expect a continuation of the multi-octet
3728
                // sequence
3729
3730
                // Legal continuation.
3731 75
                $shift = ($mState - 1) * 6;
3732 75
                $tmp = $in;
3733 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3734 75
                $mUcs4 |= $tmp;
3735
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3736
                // Unicode code point to be output.
3737 75
                if (--$mState === 0) {
3738
                    // Check for illegal sequences and code points.
3739
                    //
3740
                    // From Unicode 3.1, non-shortest form is illegal
3741
                    if (
3742 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3743
                        ||
3744 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3745
                        ||
3746 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3747
                        ||
3748 75
                        ($mBytes > 4)
3749
                        ||
3750
                        // From Unicode 3.2, surrogate characters are illegal.
3751 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3752
                        ||
3753
                        // Code points outside the Unicode range are illegal.
3754 75
                        ($mUcs4 > 0x10FFFF)
3755
                    ) {
3756 8
                        return false;
3757
                    }
3758
                    // initialize UTF8 cache
3759 75
                    $mState = 0;
3760 75
                    $mUcs4 = 0;
3761 75
                    $mBytes = 1;
3762
                }
3763
            } else {
3764
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3765
                // Incomplete multi-octet sequence.
3766 35
                return false;
3767
            }
3768
        }
3769
3770 67
        return true;
3771
    }
3772
3773
    /**
3774
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3775
     * Decodes a JSON string
3776
     *
3777
     * @see http://php.net/manual/en/function.json-decode.php
3778
     *
3779
     * @param string $json    <p>
3780
     *                        The <i>json</i> string being decoded.
3781
     *                        </p>
3782
     *                        <p>
3783
     *                        This function only works with UTF-8 encoded strings.
3784
     *                        </p>
3785
     *                        <p>PHP implements a superset of
3786
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3787
     *                        only supports these values when they are nested inside an array or an object.
3788
     *                        </p>
3789
     * @param bool   $assoc   [optional] <p>
3790
     *                        When <b>TRUE</b>, returned objects will be converted into
3791
     *                        associative arrays.
3792
     *                        </p>
3793
     * @param int    $depth   [optional] <p>
3794
     *                        User specified recursion depth.
3795
     *                        </p>
3796
     * @param int    $options [optional] <p>
3797
     *                        Bitmask of JSON decode options. Currently only
3798
     *                        <b>JSON_BIGINT_AS_STRING</b>
3799
     *                        is supported (default is to cast large integers as floats)
3800
     *                        </p>
3801
     *
3802
     * @return mixed
3803
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3804
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3805
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3806
     *               is deeper than the recursion limit.
3807
     */
3808 43
    public static function json_decode(
3809
        string $json,
3810
        bool $assoc = false,
3811
        int $depth = 512,
3812
        int $options = 0
3813
    ) {
3814 43
        $json = self::filter($json);
3815
3816 43
        if (self::$SUPPORT['json'] === false) {
3817
            throw new \RuntimeException('ext-json: is not installed');
3818
        }
3819
3820
        /** @noinspection PhpComposerExtensionStubsInspection */
3821 43
        return \json_decode($json, $assoc, $depth, $options);
3822
    }
3823
3824
    /**
3825
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3826
     * Returns the JSON representation of a value.
3827
     *
3828
     * @see http://php.net/manual/en/function.json-encode.php
3829
     *
3830
     * @param mixed $value   <p>
3831
     *                       The <i>value</i> being encoded. Can be any type except
3832
     *                       a resource.
3833
     *                       </p>
3834
     *                       <p>
3835
     *                       All string data must be UTF-8 encoded.
3836
     *                       </p>
3837
     *                       <p>PHP implements a superset of
3838
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3839
     *                       only supports these values when they are nested inside an array or an object.
3840
     *                       </p>
3841
     * @param int   $options [optional] <p>
3842
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3843
     *                       <b>JSON_HEX_TAG</b>,
3844
     *                       <b>JSON_HEX_AMP</b>,
3845
     *                       <b>JSON_HEX_APOS</b>,
3846
     *                       <b>JSON_NUMERIC_CHECK</b>,
3847
     *                       <b>JSON_PRETTY_PRINT</b>,
3848
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3849
     *                       <b>JSON_FORCE_OBJECT</b>,
3850
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3851
     *                       constants is described on
3852
     *                       the JSON constants page.
3853
     *                       </p>
3854
     * @param int   $depth   [optional] <p>
3855
     *                       Set the maximum depth. Must be greater than zero.
3856
     *                       </p>
3857
     *
3858
     * @return false|string
3859
     *                      A JSON encoded <strong>string</strong> on success or<br>
3860
     *                      <strong>FALSE</strong> on failure
3861
     */
3862 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3863
    {
3864 5
        $value = self::filter($value);
3865
3866 5
        if (self::$SUPPORT['json'] === false) {
3867
            throw new \RuntimeException('ext-json: is not installed');
3868
        }
3869
3870
        /** @noinspection PhpComposerExtensionStubsInspection */
3871 5
        return \json_encode($value, $options, $depth);
3872
    }
3873
3874
    /**
3875
     * Checks whether JSON is available on the server.
3876
     *
3877
     * @return bool
3878
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3879
     */
3880
    public static function json_loaded(): bool
3881
    {
3882
        return \function_exists('json_decode');
3883
    }
3884
3885
    /**
3886
     * Makes string's first char lowercase.
3887
     *
3888
     * @param string      $str                   <p>The input string</p>
3889
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3890
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3891
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3892
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3893
     *
3894
     * @return string the resulting string
3895
     */
3896 46
    public static function lcfirst(
3897
        string $str,
3898
        string $encoding = 'UTF-8',
3899
        bool $cleanUtf8 = false,
3900
        string $lang = null,
3901
        bool $tryToKeepStringLength = false
3902
    ): string {
3903 46
        if ($cleanUtf8 === true) {
3904
            $str = self::clean($str);
3905
        }
3906
3907 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3908
3909 46
        if ($encoding === 'UTF-8') {
3910 43
            $strPartTwo = (string) \mb_substr($str, 1);
3911
3912 43
            if ($useMbFunction === true) {
3913 43
                $strPartOne = \mb_strtolower(
3914 43
                    (string) \mb_substr($str, 0, 1)
3915
                );
3916
            } else {
3917
                $strPartOne = self::strtolower(
3918
                    (string) \mb_substr($str, 0, 1),
3919
                    $encoding,
3920
                    false,
3921
                    $lang,
3922 43
                    $tryToKeepStringLength
3923
                );
3924
            }
3925
        } else {
3926 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3927
3928 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3929
3930 3
            $strPartOne = self::strtolower(
3931 3
                (string) self::substr($str, 0, 1, $encoding),
3932 3
                $encoding,
3933 3
                false,
3934 3
                $lang,
3935 3
                $tryToKeepStringLength
3936
            );
3937
        }
3938
3939 46
        return $strPartOne . $strPartTwo;
3940
    }
3941
3942
    /**
3943
     * alias for "UTF8::lcfirst()"
3944
     *
3945
     * @see UTF8::lcfirst()
3946
     *
3947
     * @param string      $str
3948
     * @param string      $encoding
3949
     * @param bool        $cleanUtf8
3950
     * @param string|null $lang
3951
     * @param bool        $tryToKeepStringLength
3952
     *
3953
     * @return string
3954
     */
3955 2
    public static function lcword(
3956
        string $str,
3957
        string $encoding = 'UTF-8',
3958
        bool $cleanUtf8 = false,
3959
        string $lang = null,
3960
        bool $tryToKeepStringLength = false
3961
    ): string {
3962 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3963
    }
3964
3965
    /**
3966
     * Lowercase for all words in the string.
3967
     *
3968
     * @param string      $str                   <p>The input string.</p>
3969
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3970
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3971
     *                                           a new word.</p>
3972
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3973
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3974
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3975
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3976
     *
3977
     * @return string
3978
     */
3979 2
    public static function lcwords(
3980
        string $str,
3981
        array $exceptions = [],
3982
        string $charlist = '',
3983
        string $encoding = 'UTF-8',
3984
        bool $cleanUtf8 = false,
3985
        string $lang = null,
3986
        bool $tryToKeepStringLength = false
3987
    ): string {
3988 2
        if (!$str) {
3989 2
            return '';
3990
        }
3991
3992 2
        $words = self::str_to_words($str, $charlist);
3993 2
        $useExceptions = \count($exceptions) > 0;
3994
3995 2
        foreach ($words as &$word) {
3996 2
            if (!$word) {
3997 2
                continue;
3998
            }
3999
4000
            if (
4001 2
                $useExceptions === false
4002
                ||
4003 2
                !\in_array($word, $exceptions, true)
4004
            ) {
4005 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4006
            }
4007
        }
4008
4009 2
        return \implode('', $words);
4010
    }
4011
4012
    /**
4013
     * alias for "UTF8::lcfirst()"
4014
     *
4015
     * @see UTF8::lcfirst()
4016
     *
4017
     * @param string      $str
4018
     * @param string      $encoding
4019
     * @param bool        $cleanUtf8
4020
     * @param string|null $lang
4021
     * @param bool        $tryToKeepStringLength
4022
     *
4023
     * @return string
4024
     */
4025 5
    public static function lowerCaseFirst(
4026
        string $str,
4027
        string $encoding = 'UTF-8',
4028
        bool $cleanUtf8 = false,
4029
        string $lang = null,
4030
        bool $tryToKeepStringLength = false
4031
    ): string {
4032 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4033
    }
4034
4035
    /**
4036
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4037
     *
4038
     * @param string      $str   <p>The string to be trimmed</p>
4039
     * @param string|null $chars <p>Optional characters to be stripped</p>
4040
     *
4041
     * @return string the string with unwanted characters stripped from the left
4042
     */
4043 22
    public static function ltrim(string $str = '', string $chars = null): string
4044
    {
4045 22
        if ($str === '') {
4046 3
            return '';
4047
        }
4048
4049 21
        if ($chars) {
4050 10
            $chars = \preg_quote($chars, '/');
4051 10
            $pattern = "^[${chars}]+";
4052
        } else {
4053 14
            $pattern = "^[\s]+";
4054
        }
4055
4056 21
        if (self::$SUPPORT['mbstring'] === true) {
4057
            /** @noinspection PhpComposerExtensionStubsInspection */
4058 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4059
        }
4060
4061
        return self::regex_replace($str, $pattern, '', '', '/');
4062
    }
4063
4064
    /**
4065
     * Returns the UTF-8 character with the maximum code point in the given data.
4066
     *
4067
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4068
     *
4069
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4070
     */
4071 2
    public static function max($arg)
4072
    {
4073 2
        if (\is_array($arg) === true) {
4074 2
            $arg = \implode('', $arg);
4075
        }
4076
4077 2
        $codepoints = self::codepoints($arg, false);
4078 2
        if (\count($codepoints) === 0) {
4079 2
            return null;
4080
        }
4081
4082 2
        $codepoint_max = \max($codepoints);
4083
4084 2
        return self::chr($codepoint_max);
4085
    }
4086
4087
    /**
4088
     * Calculates and returns the maximum number of bytes taken by any
4089
     * UTF-8 encoded character in the given string.
4090
     *
4091
     * @param string $str <p>The original Unicode string.</p>
4092
     *
4093
     * @return int max byte lengths of the given chars
4094
     */
4095 2
    public static function max_chr_width(string $str): int
4096
    {
4097 2
        $bytes = self::chr_size_list($str);
4098 2
        if (\count($bytes) > 0) {
4099 2
            return (int) \max($bytes);
4100
        }
4101
4102 2
        return 0;
4103
    }
4104
4105
    /**
4106
     * Checks whether mbstring is available on the server.
4107
     *
4108
     * @return bool
4109
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4110
     */
4111 27
    public static function mbstring_loaded(): bool
4112
    {
4113 27
        return \extension_loaded('mbstring');
4114
    }
4115
4116
    /**
4117
     * Returns the UTF-8 character with the minimum code point in the given data.
4118
     *
4119
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4120
     *
4121
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4122
     */
4123 2
    public static function min($arg)
4124
    {
4125 2
        if (\is_array($arg) === true) {
4126 2
            $arg = \implode('', $arg);
4127
        }
4128
4129 2
        $codepoints = self::codepoints($arg, false);
4130 2
        if (\count($codepoints) === 0) {
4131 2
            return null;
4132
        }
4133
4134 2
        $codepoint_min = \min($codepoints);
4135
4136 2
        return self::chr($codepoint_min);
4137
    }
4138
4139
    /**
4140
     * alias for "UTF8::normalize_encoding()"
4141
     *
4142
     * @see        UTF8::normalize_encoding()
4143
     *
4144
     * @param mixed $encoding
4145
     * @param mixed $fallback
4146
     *
4147
     * @return mixed
4148
     *
4149
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4150
     */
4151 2
    public static function normalizeEncoding($encoding, $fallback = '')
4152
    {
4153 2
        return self::normalize_encoding($encoding, $fallback);
4154
    }
4155
4156
    /**
4157
     * Normalize the encoding-"name" input.
4158
     *
4159
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4160
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4161
     *
4162
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4163
     */
4164 323
    public static function normalize_encoding($encoding, $fallback = '')
4165
    {
4166 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4167
4168
        // init
4169 323
        $encoding = (string) $encoding;
4170
4171 323
        if (!$encoding) {
4172 278
            return $fallback;
4173
        }
4174
4175
        if (
4176 50
            $encoding === 'UTF-8'
4177
            ||
4178 50
            $encoding === 'UTF8'
4179
        ) {
4180 24
            return 'UTF-8';
4181
        }
4182
4183
        if (
4184 43
            $encoding === '8BIT'
4185
            ||
4186 43
            $encoding === 'BINARY'
4187
        ) {
4188
            return 'CP850';
4189
        }
4190
4191
        if (
4192 43
            $encoding === 'HTML'
4193
            ||
4194 43
            $encoding === 'HTML-ENTITIES'
4195
        ) {
4196 2
            return 'HTML-ENTITIES';
4197
        }
4198
4199
        if (
4200 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4201
            ||
4202 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4203
        ) {
4204 1
            return $fallback;
4205
        }
4206
4207 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4208 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4209
        }
4210
4211 6
        if (self::$ENCODINGS === null) {
4212 1
            self::$ENCODINGS = self::getData('encodings');
4213
        }
4214
4215 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4216 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4217
4218 4
            return $encoding;
4219
        }
4220
4221 5
        $encodingOrig = $encoding;
4222 5
        $encoding = \strtoupper($encoding);
4223 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/u', '', $encoding);
4224
4225
        $equivalences = [
4226 5
            'ISO8859'     => 'ISO-8859-1',
4227
            'ISO88591'    => 'ISO-8859-1',
4228
            'ISO'         => 'ISO-8859-1',
4229
            'LATIN'       => 'ISO-8859-1',
4230
            'LATIN1'      => 'ISO-8859-1', // Western European
4231
            'ISO88592'    => 'ISO-8859-2',
4232
            'LATIN2'      => 'ISO-8859-2', // Central European
4233
            'ISO88593'    => 'ISO-8859-3',
4234
            'LATIN3'      => 'ISO-8859-3', // Southern European
4235
            'ISO88594'    => 'ISO-8859-4',
4236
            'LATIN4'      => 'ISO-8859-4', // Northern European
4237
            'ISO88595'    => 'ISO-8859-5',
4238
            'ISO88596'    => 'ISO-8859-6', // Greek
4239
            'ISO88597'    => 'ISO-8859-7',
4240
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4241
            'ISO88599'    => 'ISO-8859-9',
4242
            'LATIN5'      => 'ISO-8859-9', // Turkish
4243
            'ISO885911'   => 'ISO-8859-11',
4244
            'TIS620'      => 'ISO-8859-11', // Thai
4245
            'ISO885910'   => 'ISO-8859-10',
4246
            'LATIN6'      => 'ISO-8859-10', // Nordic
4247
            'ISO885913'   => 'ISO-8859-13',
4248
            'LATIN7'      => 'ISO-8859-13', // Baltic
4249
            'ISO885914'   => 'ISO-8859-14',
4250
            'LATIN8'      => 'ISO-8859-14', // Celtic
4251
            'ISO885915'   => 'ISO-8859-15',
4252
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4253
            'ISO885916'   => 'ISO-8859-16',
4254
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4255
            'CP1250'      => 'WINDOWS-1250',
4256
            'WIN1250'     => 'WINDOWS-1250',
4257
            'WINDOWS1250' => 'WINDOWS-1250',
4258
            'CP1251'      => 'WINDOWS-1251',
4259
            'WIN1251'     => 'WINDOWS-1251',
4260
            'WINDOWS1251' => 'WINDOWS-1251',
4261
            'CP1252'      => 'WINDOWS-1252',
4262
            'WIN1252'     => 'WINDOWS-1252',
4263
            'WINDOWS1252' => 'WINDOWS-1252',
4264
            'CP1253'      => 'WINDOWS-1253',
4265
            'WIN1253'     => 'WINDOWS-1253',
4266
            'WINDOWS1253' => 'WINDOWS-1253',
4267
            'CP1254'      => 'WINDOWS-1254',
4268
            'WIN1254'     => 'WINDOWS-1254',
4269
            'WINDOWS1254' => 'WINDOWS-1254',
4270
            'CP1255'      => 'WINDOWS-1255',
4271
            'WIN1255'     => 'WINDOWS-1255',
4272
            'WINDOWS1255' => 'WINDOWS-1255',
4273
            'CP1256'      => 'WINDOWS-1256',
4274
            'WIN1256'     => 'WINDOWS-1256',
4275
            'WINDOWS1256' => 'WINDOWS-1256',
4276
            'CP1257'      => 'WINDOWS-1257',
4277
            'WIN1257'     => 'WINDOWS-1257',
4278
            'WINDOWS1257' => 'WINDOWS-1257',
4279
            'CP1258'      => 'WINDOWS-1258',
4280
            'WIN1258'     => 'WINDOWS-1258',
4281
            'WINDOWS1258' => 'WINDOWS-1258',
4282
            'UTF16'       => 'UTF-16',
4283
            'UTF32'       => 'UTF-32',
4284
            'UTF8'        => 'UTF-8',
4285
            'UTF'         => 'UTF-8',
4286
            'UTF7'        => 'UTF-7',
4287
            '8BIT'        => 'CP850',
4288
            'BINARY'      => 'CP850',
4289
        ];
4290
4291 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4292 4
            $encoding = $equivalences[$encodingUpperHelper];
4293
        }
4294
4295 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4296
4297 5
        return $encoding;
4298
    }
4299
4300
    /**
4301
     * Standardize line ending to unix-like.
4302
     *
4303
     * @param string $str
4304
     *
4305
     * @return string
4306
     */
4307 5
    public static function normalize_line_ending(string $str): string
4308
    {
4309 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4310
    }
4311
4312
    /**
4313
     * Normalize some MS Word special characters.
4314
     *
4315
     * @param string $str <p>The string to be normalized.</p>
4316
     *
4317
     * @return string
4318
     */
4319 38
    public static function normalize_msword(string $str): string
4320
    {
4321 38
        if ($str === '') {
4322 2
            return '';
4323
        }
4324
4325
        $keys = [
4326 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4327
            "\xc2\xbb", // » (U+00BB) in UTF-8
4328
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4329
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4330
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4331
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4332
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4333
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4334
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4335
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4336
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4337
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4338
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4339
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4340
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4341
        ];
4342
4343
        $values = [
4344 38
            '"', // « (U+00AB) in UTF-8
4345
            '"', // » (U+00BB) in UTF-8
4346
            "'", // ‘ (U+2018) in UTF-8
4347
            "'", // ’ (U+2019) in UTF-8
4348
            "'", // ‚ (U+201A) in UTF-8
4349
            "'", // ‛ (U+201B) in UTF-8
4350
            '"', // “ (U+201C) in UTF-8
4351
            '"', // ” (U+201D) in UTF-8
4352
            '"', // „ (U+201E) in UTF-8
4353
            '"', // ‟ (U+201F) in UTF-8
4354
            "'", // ‹ (U+2039) in UTF-8
4355
            "'", // › (U+203A) in UTF-8
4356
            '-', // – (U+2013) in UTF-8
4357
            '-', // — (U+2014) in UTF-8
4358
            '...', // … (U+2026) in UTF-8
4359
        ];
4360
4361 38
        return \str_replace($keys, $values, $str);
4362
    }
4363
4364
    /**
4365
     * Normalize the whitespace.
4366
     *
4367
     * @param string $str                     <p>The string to be normalized.</p>
4368
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4369
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4370
     *                                        bidirectional text chars.</p>
4371
     *
4372
     * @return string
4373
     */
4374 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4375
    {
4376 86
        if ($str === '') {
4377 9
            return '';
4378
        }
4379
4380 86
        static $WHITESPACE_CACHE = [];
4381 86
        $cacheKey = (int) $keepNonBreakingSpace;
4382
4383 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4384 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4385
4386 2
            if ($keepNonBreakingSpace === true) {
4387 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4388
            }
4389
4390 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4391
        }
4392
4393 86
        if ($keepBidiUnicodeControls === false) {
4394 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4395
4396 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4397 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4398
            }
4399
4400 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4401
        }
4402
4403 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4404
    }
4405
4406
    /**
4407
     * Calculates Unicode code point of the given UTF-8 encoded character.
4408
     *
4409
     * INFO: opposite to UTF8::chr()
4410
     *
4411
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4412
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4413
     *
4414
     * @return int
4415
     *             Unicode code point of the given character,<br>
4416
     *             0 on invalid UTF-8 byte sequence
4417
     */
4418 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4419
    {
4420 30
        static $CHAR_CACHE = [];
4421
4422
        // init
4423 30
        $chr = (string) $chr;
4424
4425 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4426 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4427
        }
4428
4429 30
        $cacheKey = $chr . $encoding;
4430 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4431 30
            return $CHAR_CACHE[$cacheKey];
4432
        }
4433
4434
        // check again, if it's still not UTF-8
4435 12
        if ($encoding !== 'UTF-8') {
4436 3
            $chr = self::encode($encoding, $chr);
4437
        }
4438
4439 12
        if (self::$ORD === null) {
4440
            self::$ORD = self::getData('ord');
4441
        }
4442
4443 12
        if (isset(self::$ORD[$chr])) {
4444 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4445
        }
4446
4447
        //
4448
        // fallback via "IntlChar"
4449
        //
4450
4451 6
        if (self::$SUPPORT['intlChar'] === true) {
4452
            /** @noinspection PhpComposerExtensionStubsInspection */
4453 5
            $code = \IntlChar::ord($chr);
4454 5
            if ($code) {
4455 5
                return $CHAR_CACHE[$cacheKey] = $code;
4456
            }
4457
        }
4458
4459
        //
4460
        // fallback via vanilla php
4461
        //
4462
4463
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4464 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4465 1
        $code = $chr ? $chr[1] : 0;
4466
4467 1
        if ($code >= 0xF0 && isset($chr[4])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4470
        }
4471
4472 1
        if ($code >= 0xE0 && isset($chr[3])) {
4473
            /** @noinspection UnnecessaryCastingInspection */
4474 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4475
        }
4476
4477 1
        if ($code >= 0xC0 && isset($chr[2])) {
4478
            /** @noinspection UnnecessaryCastingInspection */
4479 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4480
        }
4481
4482
        return $CHAR_CACHE[$cacheKey] = $code;
4483
    }
4484
4485
    /**
4486
     * Parses the string into an array (into the the second parameter).
4487
     *
4488
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4489
     *          if the second parameter is not set!
4490
     *
4491
     * @see http://php.net/manual/en/function.parse-str.php
4492
     *
4493
     * @param string $str       <p>The input string.</p>
4494
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4495
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4496
     *
4497
     * @return bool
4498
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4499
     */
4500 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4501
    {
4502 2
        if ($cleanUtf8 === true) {
4503 2
            $str = self::clean($str);
4504
        }
4505
4506 2
        if (self::$SUPPORT['mbstring'] === true) {
4507 2
            $return = \mb_parse_str($str, $result);
4508
4509 2
            return $return !== false && $result !== [];
4510
        }
4511
4512
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4513
        \parse_str($str, $result);
4514
4515
        return $result !== [];
4516
    }
4517
4518
    /**
4519
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4520
     *
4521
     * @return bool
4522
     *              <strong>true</strong> if support is available,<br>
4523
     *              <strong>false</strong> otherwise
4524
     */
4525 102
    public static function pcre_utf8_support(): bool
4526
    {
4527
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4528 102
        return (bool) @\preg_match('//u', '');
4529
    }
4530
4531
    /**
4532
     * Create an array containing a range of UTF-8 characters.
4533
     *
4534
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4535
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4536
     *
4537
     * @return string[]
4538
     */
4539 2
    public static function range($var1, $var2): array
4540
    {
4541 2
        if (!$var1 || !$var2) {
4542 2
            return [];
4543
        }
4544
4545 2
        if (self::$SUPPORT['ctype'] === false) {
4546
            throw new \RuntimeException('ext-ctype: is not installed');
4547
        }
4548
4549
        /** @noinspection PhpComposerExtensionStubsInspection */
4550 2
        if (\ctype_digit((string) $var1)) {
4551 2
            $start = (int) $var1;
4552 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4553
            $start = (int) self::hex_to_int($var1);
4554
        } else {
4555 2
            $start = self::ord($var1);
4556
        }
4557
4558 2
        if (!$start) {
4559
            return [];
4560
        }
4561
4562
        /** @noinspection PhpComposerExtensionStubsInspection */
4563 2
        if (\ctype_digit((string) $var2)) {
4564 2
            $end = (int) $var2;
4565 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4566
            $end = (int) self::hex_to_int($var2);
4567
        } else {
4568 2
            $end = self::ord($var2);
4569
        }
4570
4571 2
        if (!$end) {
4572
            return [];
4573
        }
4574
4575 2
        return \array_map(
4576
            static function (int $i): string {
4577 2
                return (string) self::chr($i);
4578 2
            },
4579 2
            \range($start, $end)
4580
        );
4581
    }
4582
4583
    /**
4584
     * Multi decode html entity & fix urlencoded-win1252-chars.
4585
     *
4586
     * e.g:
4587
     * 'test+test'                     => 'test+test'
4588
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4589
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4590
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4591
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4592
     * 'Düsseldorf'                   => 'Düsseldorf'
4593
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4594
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4595
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4596
     *
4597
     * @param string $str          <p>The input string.</p>
4598
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4599
     *
4600
     * @return string
4601
     */
4602 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4603
    {
4604 6
        if ($str === '') {
4605 4
            return '';
4606
        }
4607
4608
        if (
4609 6
            \strpos($str, '&') === false
4610
            &&
4611 6
            \strpos($str, '%') === false
4612
            &&
4613 6
            \strpos($str, '+') === false
4614
            &&
4615 6
            \strpos($str, '\u') === false
4616
        ) {
4617 4
            return self::fix_simple_utf8($str);
4618
        }
4619
4620 6
        $str = self::urldecode_unicode_helper($str);
4621
4622
        do {
4623 6
            $str_compare = $str;
4624
4625
            /**
4626
             * @psalm-suppress PossiblyInvalidArgument
4627
             */
4628 6
            $str = self::fix_simple_utf8(
4629 6
                \rawurldecode(
4630 6
                    self::html_entity_decode(
4631 6
                        self::to_utf8($str),
4632 6
                        \ENT_QUOTES | \ENT_HTML5
4633
                    )
4634
                )
4635
            );
4636 6
        } while ($multi_decode === true && $str_compare !== $str);
4637
4638 6
        return $str;
4639
    }
4640
4641
    /**
4642
     * Replaces all occurrences of $pattern in $str by $replacement.
4643
     *
4644
     * @param string $str         <p>The input string.</p>
4645
     * @param string $pattern     <p>The regular expression pattern.</p>
4646
     * @param string $replacement <p>The string to replace with.</p>
4647
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4648
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4649
     *
4650
     * @return string
4651
     */
4652 18
    public static function regex_replace(
4653
        string $str,
4654
        string $pattern,
4655
        string $replacement,
4656
        string $options = '',
4657
        string $delimiter = '/'
4658
    ): string {
4659 18
        if ($options === 'msr') {
4660 9
            $options = 'ms';
4661
        }
4662
4663
        // fallback
4664 18
        if (!$delimiter) {
4665
            $delimiter = '/';
4666
        }
4667
4668 18
        return (string) \preg_replace(
4669 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4670 18
            $replacement,
4671 18
            $str
4672
        );
4673
    }
4674
4675
    /**
4676
     * alias for "UTF8::remove_bom()"
4677
     *
4678
     * @see        UTF8::remove_bom()
4679
     *
4680
     * @param string $str
4681
     *
4682
     * @return string
4683
     *
4684
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4685
     */
4686
    public static function removeBOM(string $str): string
4687
    {
4688
        return self::remove_bom($str);
4689
    }
4690
4691
    /**
4692
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4693
     *
4694
     * @param string $str <p>The input string.</p>
4695
     *
4696
     * @return string string without UTF-BOM
4697
     */
4698 82
    public static function remove_bom(string $str): string
4699
    {
4700 82
        if ($str === '') {
4701 9
            return '';
4702
        }
4703
4704 82
        $strLength = \strlen($str);
4705 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4706 82
            if (\strpos($str, $bomString, 0) === 0) {
4707 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4708 10
                if ($strTmp === false) {
4709
                    return '';
4710
                }
4711
4712 10
                $strLength -= (int) $bomByteLength;
4713
4714 82
                $str = (string) $strTmp;
4715
            }
4716
        }
4717
4718 82
        return $str;
4719
    }
4720
4721
    /**
4722
     * Removes duplicate occurrences of a string in another string.
4723
     *
4724
     * @param string          $str  <p>The base string.</p>
4725
     * @param string|string[] $what <p>String to search for in the base string.</p>
4726
     *
4727
     * @return string the result string with removed duplicates
4728
     */
4729 2
    public static function remove_duplicates(string $str, $what = ' '): string
4730
    {
4731 2
        if (\is_string($what) === true) {
4732 2
            $what = [$what];
4733
        }
4734
4735 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4736
            /** @noinspection ForeachSourceInspection */
4737 2
            foreach ($what as $item) {
4738 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4739
            }
4740
        }
4741
4742 2
        return $str;
4743
    }
4744
4745
    /**
4746
     * Remove html via "strip_tags()" from the string.
4747
     *
4748
     * @param string $str
4749
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4750
     *                              not be stripped. Default: null
4751
     *                              </p>
4752
     *
4753
     * @return string
4754
     */
4755 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4756
    {
4757 6
        return \strip_tags($str, $allowableTags);
4758
    }
4759
4760
    /**
4761
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4762
     *
4763
     * @param string $str
4764
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4765
     *
4766
     * @return string
4767
     */
4768 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4769
    {
4770 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4771
    }
4772
4773
    /**
4774
     * Remove invisible characters from a string.
4775
     *
4776
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4777
     *
4778
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4779
     *
4780
     * @param string $str
4781
     * @param bool   $url_encoded
4782
     * @param string $replacement
4783
     *
4784
     * @return string
4785
     */
4786 116
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4787
    {
4788
        // init
4789 116
        $non_displayables = [];
4790
4791
        // every control character except newline (dec 10),
4792
        // carriage return (dec 13) and horizontal tab (dec 09)
4793 116
        if ($url_encoded) {
4794 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4795 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4796
        }
4797
4798 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4799
4800
        do {
4801 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4802 116
        } while ($count !== 0);
4803
4804 116
        return $str;
4805
    }
4806
4807
    /**
4808
     * Returns a new string with the prefix $substring removed, if present.
4809
     *
4810
     * @param string $str
4811
     * @param string $substring <p>The prefix to remove.</p>
4812
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4813
     *
4814
     * @return string string without the prefix $substring
4815
     */
4816 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4817
    {
4818 12
        if ($substring && \strpos($str, $substring) === 0) {
4819 6
            if ($encoding === 'UTF-8') {
4820 4
                return (string) \mb_substr(
4821 4
                    $str,
4822 4
                    (int) \mb_strlen($substring)
4823
                );
4824
            }
4825
4826 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4827
4828 2
            return (string) self::substr(
4829 2
                $str,
4830 2
                (int) self::strlen($substring, $encoding),
4831 2
                null,
4832 2
                $encoding
4833
            );
4834
        }
4835
4836 6
        return $str;
4837
    }
4838
4839
    /**
4840
     * Returns a new string with the suffix $substring removed, if present.
4841
     *
4842
     * @param string $str
4843
     * @param string $substring <p>The suffix to remove.</p>
4844
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4845
     *
4846
     * @return string string having a $str without the suffix $substring
4847
     */
4848 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4849
    {
4850 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4851 6
            if ($encoding === 'UTF-8') {
4852 4
                return (string) \mb_substr(
4853 4
                    $str,
4854 4
                    0,
4855 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4856
                );
4857
            }
4858
4859 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4860
4861 2
            return (string) self::substr(
4862 2
                $str,
4863 2
                0,
4864 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4865 2
                $encoding
4866
            );
4867
        }
4868
4869 6
        return $str;
4870
    }
4871
4872
    /**
4873
     * Replaces all occurrences of $search in $str by $replacement.
4874
     *
4875
     * @param string $str           <p>The input string.</p>
4876
     * @param string $search        <p>The needle to search for.</p>
4877
     * @param string $replacement   <p>The string to replace with.</p>
4878
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4879
     *
4880
     * @return string string after the replacements
4881
     */
4882 29
    public static function replace(
4883
        string $str,
4884
        string $search,
4885
        string $replacement,
4886
        bool $caseSensitive = true
4887
    ): string {
4888 29
        if ($caseSensitive) {
4889 22
            return \str_replace($search, $replacement, $str);
4890
        }
4891
4892 7
        return self::str_ireplace($search, $replacement, $str);
4893
    }
4894
4895
    /**
4896
     * Replaces all occurrences of $search in $str by $replacement.
4897
     *
4898
     * @param string       $str           <p>The input string.</p>
4899
     * @param array        $search        <p>The elements to search for.</p>
4900
     * @param array|string $replacement   <p>The string to replace with.</p>
4901
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4902
     *
4903
     * @return string string after the replacements
4904
     */
4905 30
    public static function replace_all(
4906
        string $str,
4907
        array $search,
4908
        $replacement,
4909
        bool $caseSensitive = true
4910
    ): string {
4911 30
        if ($caseSensitive) {
4912 23
            return \str_replace($search, $replacement, $str);
4913
        }
4914
4915 7
        return self::str_ireplace($search, $replacement, $str);
4916
    }
4917
4918
    /**
4919
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4920
     *
4921
     * @param string $str                <p>The input string</p>
4922
     * @param string $replacementChar    <p>The replacement character.</p>
4923
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4924
     *
4925
     * @return string
4926
     */
4927 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4928
    {
4929 62
        if ($str === '') {
4930 9
            return '';
4931
        }
4932
4933 62
        if ($processInvalidUtf8 === true) {
4934 62
            $replacementCharHelper = $replacementChar;
4935 62
            if ($replacementChar === '') {
4936 62
                $replacementCharHelper = 'none';
4937
            }
4938
4939 62
            if (self::$SUPPORT['mbstring'] === false) {
4940
                // if there is no native support for "mbstring",
4941
                // then we need to clean the string before ...
4942
                $str = self::clean($str);
4943
            }
4944
4945 62
            $save = \mb_substitute_character();
4946 62
            \mb_substitute_character($replacementCharHelper);
4947
            // the polyfill maybe return false, so cast to string
4948 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4949 62
            \mb_substitute_character($save);
4950
        }
4951
4952 62
        return \str_replace(
4953
            [
4954 62
                "\xEF\xBF\xBD",
4955
                '�',
4956
            ],
4957
            [
4958 62
                $replacementChar,
4959 62
                $replacementChar,
4960
            ],
4961 62
            $str
4962
        );
4963
    }
4964
4965
    /**
4966
     * Strip whitespace or other characters from end of a UTF-8 string.
4967
     *
4968
     * @param string      $str   <p>The string to be trimmed.</p>
4969
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4970
     *
4971
     * @return string the string with unwanted characters stripped from the right
4972
     */
4973 20
    public static function rtrim(string $str = '', string $chars = null): string
4974
    {
4975 20
        if ($str === '') {
4976 3
            return '';
4977
        }
4978
4979 19
        if ($chars) {
4980 8
            $chars = \preg_quote($chars, '/');
4981 8
            $pattern = "[${chars}]+\$";
4982
        } else {
4983 14
            $pattern = "[\s]+\$";
4984
        }
4985
4986 19
        if (self::$SUPPORT['mbstring'] === true) {
4987
            /** @noinspection PhpComposerExtensionStubsInspection */
4988 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4989
        }
4990
4991
        return self::regex_replace($str, $pattern, '', '', '/');
4992
    }
4993
4994
    /**
4995
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4996
     *
4997
     * @psalm-suppress MissingReturnType
4998
     */
4999 2
    public static function showSupport()
5000
    {
5001 2
        echo '<pre>';
5002 2
        foreach (self::$SUPPORT as $key => &$value) {
5003 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
5004
        }
5005 2
        unset($value);
5006 2
        echo '</pre>';
5007 2
    }
5008
5009
    /**
5010
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5011
     *
5012
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5013
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5014
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5015
     *
5016
     * @return string the HTML numbered entity
5017
     */
5018 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5019
    {
5020 2
        if ($char === '') {
5021 2
            return '';
5022
        }
5023
5024
        if (
5025 2
            $keepAsciiChars === true
5026
            &&
5027 2
            self::is_ascii($char) === true
5028
        ) {
5029 2
            return $char;
5030
        }
5031
5032 2
        return '&#' . self::ord($char, $encoding) . ';';
5033
    }
5034
5035
    /**
5036
     * @param string $str
5037
     * @param int    $tabLength
5038
     *
5039
     * @return string
5040
     */
5041 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5042
    {
5043 5
        if ($tabLength === 4) {
5044 3
            $tab = '    ';
5045 2
        } elseif ($tabLength === 2) {
5046 1
            $tab = '  ';
5047
        } else {
5048 1
            $tab = \str_repeat(' ', $tabLength);
5049
        }
5050
5051 5
        return \str_replace($tab, "\t", $str);
5052
    }
5053
5054
    /**
5055
     * Convert a string to an array of Unicode characters.
5056
     *
5057
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
5058
     * @param int                       $length             [optional] <p>Max character length of each array
5059
     *                                                      element.</p>
5060
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
5061
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
5062
     *                                                      "mb_substr"</p>
5063
     *
5064
     * @return array
5065
     *               <p>An array containing chunks of the input.</p>
5066
     */
5067 87
    public static function str_split(
5068
        $str,
5069
        int $length = 1,
5070
        bool $cleanUtf8 = false,
5071
        bool $tryToUseMbFunction = true
5072
    ): array {
5073 87
        if ($length <= 0) {
5074 3
            return [];
5075
        }
5076
5077 86
        if (\is_array($str) === true) {
5078 2
            foreach ($str as $k => &$v) {
5079 2
                $v = self::str_split(
5080 2
                    $v,
5081 2
                    $length,
5082 2
                    $cleanUtf8,
5083 2
                    $tryToUseMbFunction
5084
                );
5085
            }
5086
5087 2
            return $str;
5088
        }
5089
5090
        // init
5091 86
        $str = (string) $str;
5092
5093 86
        if ($str === '') {
5094 13
            return [];
5095
        }
5096
5097 83
        if ($cleanUtf8 === true) {
5098 19
            $str = self::clean($str);
5099
        }
5100
5101
        if (
5102 83
            $tryToUseMbFunction === true
5103
            &&
5104 83
            self::$SUPPORT['mbstring'] === true
5105
        ) {
5106 79
            $iMax = \mb_strlen($str);
5107 79
            if ($iMax <= 127) {
5108 73
                $ret = [];
5109 73
                for ($i = 0; $i < $iMax; ++$i) {
5110 73
                    $ret[] = \mb_substr($str, $i, 1);
5111
                }
5112
            } else {
5113 15
                $retArray = [];
5114 15
                \preg_match_all('/./us', $str, $retArray);
5115 79
                $ret = $retArray[0] ?? [];
5116
            }
5117 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5118 17
            $retArray = [];
5119 17
            \preg_match_all('/./us', $str, $retArray);
5120 17
            $ret = $retArray[0] ?? [];
5121
        } else {
5122
5123
            // fallback
5124
5125 8
            $ret = [];
5126 8
            $len = \strlen($str);
5127
5128
            /** @noinspection ForeachInvariantsInspection */
5129 8
            for ($i = 0; $i < $len; ++$i) {
5130 8
                if (($str[$i] & "\x80") === "\x00") {
5131 8
                    $ret[] = $str[$i];
5132
                } elseif (
5133 8
                    isset($str[$i + 1])
5134
                    &&
5135 8
                    ($str[$i] & "\xE0") === "\xC0"
5136
                ) {
5137 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5138 4
                        $ret[] = $str[$i] . $str[$i + 1];
5139
5140 4
                        ++$i;
5141
                    }
5142
                } elseif (
5143 6
                    isset($str[$i + 2])
5144
                    &&
5145 6
                    ($str[$i] & "\xF0") === "\xE0"
5146
                ) {
5147
                    if (
5148 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5149
                        &&
5150 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5151
                    ) {
5152 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5153
5154 6
                        $i += 2;
5155
                    }
5156
                } elseif (
5157
                    isset($str[$i + 3])
5158
                    &&
5159
                    ($str[$i] & "\xF8") === "\xF0"
5160
                ) {
5161
                    if (
5162
                        ($str[$i + 1] & "\xC0") === "\x80"
5163
                        &&
5164
                        ($str[$i + 2] & "\xC0") === "\x80"
5165
                        &&
5166
                        ($str[$i + 3] & "\xC0") === "\x80"
5167
                    ) {
5168
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5169
5170
                        $i += 3;
5171
                    }
5172
                }
5173
            }
5174
        }
5175
5176 83
        if ($length > 1) {
5177 11
            $ret = \array_chunk($ret, $length);
5178
5179 11
            return \array_map(
5180
                static function (array &$item): string {
5181 11
                    return \implode('', $item);
5182 11
                },
5183 11
                $ret
5184
            );
5185
        }
5186
5187 76
        if (isset($ret[0]) && $ret[0] === '') {
5188
            return [];
5189
        }
5190
5191 76
        return $ret;
5192
    }
5193
5194
    /**
5195
     * Returns a camelCase version of the string. Trims surrounding spaces,
5196
     * capitalizes letters following digits, spaces, dashes and underscores,
5197
     * and removes spaces, dashes, as well as underscores.
5198
     *
5199
     * @param string      $str                   <p>The input string.</p>
5200
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5201
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5202
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5203
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5204
     *
5205
     * @return string
5206
     */
5207 32
    public static function str_camelize(
5208
        string $str,
5209
        string $encoding = 'UTF-8',
5210
        bool $cleanUtf8 = false,
5211
        string $lang = null,
5212
        bool $tryToKeepStringLength = false
5213
    ): string {
5214 32
        if ($cleanUtf8 === true) {
5215
            $str = self::clean($str);
5216
        }
5217
5218 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5219 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5220
        }
5221
5222 32
        $str = self::lcfirst(
5223 32
            \trim($str),
5224 32
            $encoding,
5225 32
            false,
5226 32
            $lang,
5227 32
            $tryToKeepStringLength
5228
        );
5229 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5230
5231 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5232
5233 32
        $str = (string) \preg_replace_callback(
5234 32
            '/[-_\s]+(.)?/u',
5235
            /**
5236
             * @param array $match
5237
             *
5238
             * @return string
5239
             */
5240
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5241 27
                if (isset($match[1])) {
5242 27
                    if ($useMbFunction === true) {
5243 27
                        if ($encoding === 'UTF-8') {
5244 27
                            return \mb_strtoupper($match[1]);
5245
                        }
5246
5247
                        return \mb_strtoupper($match[1], $encoding);
5248
                    }
5249
5250
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5251
                }
5252
5253 1
                return '';
5254 32
            },
5255 32
            $str
5256
        );
5257
5258 32
        return (string) \preg_replace_callback(
5259 32
            '/[\d]+(.)?/u',
5260
            /**
5261
             * @param array $match
5262
             *
5263
             * @return string
5264
             */
5265
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5266 6
                if ($useMbFunction === true) {
5267 6
                    if ($encoding === 'UTF-8') {
5268 6
                        return \mb_strtoupper($match[0]);
5269
                    }
5270
5271
                    return \mb_strtoupper($match[0], $encoding);
5272
                }
5273
5274
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5275 32
            },
5276 32
            $str
5277
        );
5278
    }
5279
5280
    /**
5281
     * Returns the string with the first letter of each word capitalized,
5282
     * except for when the word is a name which shouldn't be capitalized.
5283
     *
5284
     * @param string $str
5285
     *
5286
     * @return string string with $str capitalized
5287
     */
5288 1
    public static function str_capitalize_name(string $str): string
5289
    {
5290 1
        return self::str_capitalize_name_helper(
5291 1
            self::str_capitalize_name_helper(
5292 1
                self::collapse_whitespace($str),
5293 1
                ' '
5294
            ),
5295 1
            '-'
5296
        );
5297
    }
5298
5299
    /**
5300
     * Returns true if the string contains $needle, false otherwise. By default
5301
     * the comparison is case-sensitive, but can be made insensitive by setting
5302
     * $caseSensitive to false.
5303
     *
5304
     * @param string $haystack      <p>The input string.</p>
5305
     * @param string $needle        <p>Substring to look for.</p>
5306
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5307
     *
5308
     * @return bool whether or not $haystack contains $needle
5309
     */
5310 21
    public static function str_contains(
5311
        string $haystack,
5312
        string $needle,
5313
        bool $caseSensitive = true
5314
    ): bool {
5315 21
        if ($caseSensitive) {
5316 11
            return \strpos($haystack, $needle) !== false;
5317
        }
5318
5319 10
        return \mb_stripos($haystack, $needle) !== false;
5320
    }
5321
5322
    /**
5323
     * Returns true if the string contains all $needles, false otherwise. By
5324
     * default the comparison is case-sensitive, but can be made insensitive by
5325
     * setting $caseSensitive to false.
5326
     *
5327
     * @param string $haystack      <p>The input string.</p>
5328
     * @param array  $needles       <p>SubStrings to look for.</p>
5329
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5330
     *
5331
     * @return bool whether or not $haystack contains $needle
5332
     */
5333 44
    public static function str_contains_all(
5334
        string $haystack,
5335
        array $needles,
5336
        bool $caseSensitive = true
5337
    ): bool {
5338 44
        if ($haystack === '' || $needles === []) {
5339 1
            return false;
5340
        }
5341
5342
        /** @noinspection LoopWhichDoesNotLoopInspection */
5343 43
        foreach ($needles as &$needle) {
5344 43
            if (!$needle) {
5345 1
                return false;
5346
            }
5347
5348 42
            if ($caseSensitive) {
5349 22
                return \strpos($haystack, $needle) !== false;
5350
            }
5351
5352 20
            return \mb_stripos($haystack, $needle) !== false;
5353
        }
5354
5355
        return true;
5356
    }
5357
5358
    /**
5359
     * Returns true if the string contains any $needles, false otherwise. By
5360
     * default the comparison is case-sensitive, but can be made insensitive by
5361
     * setting $caseSensitive to false.
5362
     *
5363
     * @param string $haystack      <p>The input string.</p>
5364
     * @param array  $needles       <p>SubStrings to look for.</p>
5365
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5366
     *
5367
     * @return bool
5368
     *              Whether or not $str contains $needle
5369
     */
5370 43
    public static function str_contains_any(
5371
        string $haystack,
5372
        array $needles,
5373
        bool $caseSensitive = true
5374
    ): bool {
5375 43
        if ($haystack === '' || $needles === []) {
5376 1
            return false;
5377
        }
5378
5379
        /** @noinspection LoopWhichDoesNotLoopInspection */
5380 42
        foreach ($needles as &$needle) {
5381 42
            if (!$needle) {
5382
                return false;
5383
            }
5384
5385 42
            if ($caseSensitive) {
5386 22
                return \strpos($haystack, $needle) !== false;
5387
            }
5388
5389 20
            return \mb_stripos($haystack, $needle) !== false;
5390
        }
5391
5392
        return false;
5393
    }
5394
5395
    /**
5396
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5397
     * inserted before uppercase characters (with the exception of the first
5398
     * character of the string), and in place of spaces as well as underscores.
5399
     *
5400
     * @param string $str      <p>The input string.</p>
5401
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5402
     *
5403
     * @return string
5404
     */
5405 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5406
    {
5407 19
        return self::str_delimit($str, '-', $encoding);
5408
    }
5409
5410
    /**
5411
     * Returns a lowercase and trimmed string separated by the given delimiter.
5412
     * Delimiters are inserted before uppercase characters (with the exception
5413
     * of the first character of the string), and in place of spaces, dashes,
5414
     * and underscores. Alpha delimiters are not converted to lowercase.
5415
     *
5416
     * @param string      $str                   <p>The input string.</p>
5417
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5418
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5419
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5420
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5421
     *                                           tr</p>
5422
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5423
     *                                           ß</p>
5424
     *
5425
     * @return string
5426
     */
5427 49
    public static function str_delimit(
5428
        string $str,
5429
        string $delimiter,
5430
        string $encoding = 'UTF-8',
5431
        bool $cleanUtf8 = false,
5432
        string $lang = null,
5433
        bool $tryToKeepStringLength = false
5434
    ): string {
5435 49
        if (self::$SUPPORT['mbstring'] === true) {
5436
            /** @noinspection PhpComposerExtensionStubsInspection */
5437 49
            $str = (string) \mb_ereg_replace('\B(\p{Lu})', '-\1', \trim($str));
5438
5439 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5440 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5441 22
                $str = \mb_strtolower($str);
5442
            } else {
5443 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5444
            }
5445
5446
            /** @noinspection PhpComposerExtensionStubsInspection */
5447 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5448
        }
5449
5450
        $str = (string) \preg_replace('/\B(\p{Lu})/u', '-\1', \trim($str));
5451
5452
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5453
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5454
            $str = \mb_strtolower($str);
5455
        } else {
5456
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5457
        }
5458
5459
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5460
    }
5461
5462
    /**
5463
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5464
     *
5465
     * @param string $str <p>The input string.</p>
5466
     *
5467
     * @return false|string
5468
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5469
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5470
     */
5471 30
    public static function str_detect_encoding($str)
5472
    {
5473
        // init
5474 30
        $str = (string) $str;
5475
5476
        //
5477
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5478
        //
5479
5480 30
        if (self::is_binary($str, true) === true) {
5481 11
            $isUtf16 = self::is_utf16($str, false);
5482 11
            if ($isUtf16 === 1) {
5483 2
                return 'UTF-16LE';
5484
            }
5485 11
            if ($isUtf16 === 2) {
5486 2
                return 'UTF-16BE';
5487
            }
5488
5489 9
            $isUtf32 = self::is_utf32($str, false);
5490 9
            if ($isUtf32 === 1) {
5491
                return 'UTF-32LE';
5492
            }
5493 9
            if ($isUtf32 === 2) {
5494
                return 'UTF-32BE';
5495
            }
5496
5497
            // is binary but not "UTF-16" or "UTF-32"
5498 9
            return false;
5499
        }
5500
5501
        //
5502
        // 2.) simple check for ASCII chars
5503
        //
5504
5505 26
        if (self::is_ascii($str) === true) {
5506 10
            return 'ASCII';
5507
        }
5508
5509
        //
5510
        // 3.) simple check for UTF-8 chars
5511
        //
5512
5513 26
        if (self::is_utf8($str) === true) {
5514 19
            return 'UTF-8';
5515
        }
5516
5517
        //
5518
        // 4.) check via "mb_detect_encoding()"
5519
        //
5520
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5521
5522
        $detectOrder = [
5523 15
            'ISO-8859-1',
5524
            'ISO-8859-2',
5525
            'ISO-8859-3',
5526
            'ISO-8859-4',
5527
            'ISO-8859-5',
5528
            'ISO-8859-6',
5529
            'ISO-8859-7',
5530
            'ISO-8859-8',
5531
            'ISO-8859-9',
5532
            'ISO-8859-10',
5533
            'ISO-8859-13',
5534
            'ISO-8859-14',
5535
            'ISO-8859-15',
5536
            'ISO-8859-16',
5537
            'WINDOWS-1251',
5538
            'WINDOWS-1252',
5539
            'WINDOWS-1254',
5540
            'CP932',
5541
            'CP936',
5542
            'CP950',
5543
            'CP866',
5544
            'CP850',
5545
            'CP51932',
5546
            'CP50220',
5547
            'CP50221',
5548
            'CP50222',
5549
            'ISO-2022-JP',
5550
            'ISO-2022-KR',
5551
            'JIS',
5552
            'JIS-ms',
5553
            'EUC-CN',
5554
            'EUC-JP',
5555
        ];
5556
5557 15
        if (self::$SUPPORT['mbstring'] === true) {
5558
            // info: do not use the symfony polyfill here
5559 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5560 15
            if ($encoding) {
5561 15
                return $encoding;
5562
            }
5563
        }
5564
5565
        //
5566
        // 5.) check via "iconv()"
5567
        //
5568
5569
        if (self::$ENCODINGS === null) {
5570
            self::$ENCODINGS = self::getData('encodings');
5571
        }
5572
5573
        foreach (self::$ENCODINGS as $encodingTmp) {
5574
            // INFO: //IGNORE but still throw notice
5575
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5576
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5577
                return $encodingTmp;
5578
            }
5579
        }
5580
5581
        return false;
5582
    }
5583
5584
    /**
5585
     * Check if the string ends with the given substring.
5586
     *
5587
     * @param string $haystack <p>The string to search in.</p>
5588
     * @param string $needle   <p>The substring to search for.</p>
5589
     *
5590
     * @return bool
5591
     */
5592 9
    public static function str_ends_with(string $haystack, string $needle): bool
5593
    {
5594 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5595
    }
5596
5597
    /**
5598
     * Returns true if the string ends with any of $substrings, false otherwise.
5599
     *
5600
     * - case-sensitive
5601
     *
5602
     * @param string   $str        <p>The input string.</p>
5603
     * @param string[] $substrings <p>Substrings to look for.</p>
5604
     *
5605
     * @return bool whether or not $str ends with $substring
5606
     */
5607 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5608
    {
5609 7
        if ($substrings === []) {
5610
            return false;
5611
        }
5612
5613 7
        foreach ($substrings as &$substring) {
5614 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5615 7
                return true;
5616
            }
5617
        }
5618
5619 6
        return false;
5620
    }
5621
5622
    /**
5623
     * Ensures that the string begins with $substring. If it doesn't, it's
5624
     * prepended.
5625
     *
5626
     * @param string $str       <p>The input string.</p>
5627
     * @param string $substring <p>The substring to add if not present.</p>
5628
     *
5629
     * @return string
5630
     */
5631 10
    public static function str_ensure_left(string $str, string $substring): string
5632
    {
5633
        if (
5634 10
            $substring !== ''
5635
            &&
5636 10
            \strpos($str, $substring) === 0
5637
        ) {
5638 6
            return $str;
5639
        }
5640
5641 4
        return $substring . $str;
5642
    }
5643
5644
    /**
5645
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5646
     *
5647
     * @param string $str       <p>The input string.</p>
5648
     * @param string $substring <p>The substring to add if not present.</p>
5649
     *
5650
     * @return string
5651
     */
5652 10
    public static function str_ensure_right(string $str, string $substring): string
5653
    {
5654
        if (
5655 10
            $str === ''
5656
            ||
5657 10
            $substring === ''
5658
            ||
5659 10
            \substr($str, -\strlen($substring)) !== $substring
5660
        ) {
5661 4
            $str .= $substring;
5662
        }
5663
5664 10
        return $str;
5665
    }
5666
5667
    /**
5668
     * Capitalizes the first word of the string, replaces underscores with
5669
     * spaces, and strips '_id'.
5670
     *
5671
     * @param string $str
5672
     *
5673
     * @return string
5674
     */
5675 3
    public static function str_humanize($str): string
5676
    {
5677 3
        $str = \str_replace(
5678
            [
5679 3
                '_id',
5680
                '_',
5681
            ],
5682
            [
5683 3
                '',
5684
                ' ',
5685
            ],
5686 3
            $str
5687
        );
5688
5689 3
        return self::ucfirst(\trim($str));
5690
    }
5691
5692
    /**
5693
     * Check if the string ends with the given substring, case insensitive.
5694
     *
5695
     * @param string $haystack <p>The string to search in.</p>
5696
     * @param string $needle   <p>The substring to search for.</p>
5697
     *
5698
     * @return bool
5699
     */
5700 12
    public static function str_iends_with(string $haystack, string $needle): bool
5701
    {
5702 12
        if ($haystack === '' || $needle === '') {
5703 2
            return false;
5704
        }
5705
5706 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5707
    }
5708
5709
    /**
5710
     * Returns true if the string ends with any of $substrings, false otherwise.
5711
     *
5712
     * - case-insensitive
5713
     *
5714
     * @param string   $str        <p>The input string.</p>
5715
     * @param string[] $substrings <p>Substrings to look for.</p>
5716
     *
5717
     * @return bool whether or not $str ends with $substring
5718
     */
5719 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5720
    {
5721 4
        if ($substrings === []) {
5722
            return false;
5723
        }
5724
5725 4
        foreach ($substrings as &$substring) {
5726 4
            if (self::str_iends_with($str, $substring)) {
5727 4
                return true;
5728
            }
5729
        }
5730
5731
        return false;
5732
    }
5733
5734
    /**
5735
     * Returns the index of the first occurrence of $needle in the string,
5736
     * and false if not found. Accepts an optional offset from which to begin
5737
     * the search.
5738
     *
5739
     * @param string $str      <p>The input string.</p>
5740
     * @param string $needle   <p>Substring to look for.</p>
5741
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5742
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5743
     *
5744
     * @return false|int
5745
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5746
     */
5747 2
    public static function str_iindex_first(
5748
        string $str,
5749
        string $needle,
5750
        int $offset = 0,
5751
        string $encoding = 'UTF-8'
5752
    ) {
5753 2
        return self::stripos(
5754 2
            $str,
5755 2
            $needle,
5756 2
            $offset,
5757 2
            $encoding
5758
        );
5759
    }
5760
5761
    /**
5762
     * Returns the index of the last occurrence of $needle in the string,
5763
     * and false if not found. Accepts an optional offset from which to begin
5764
     * the search. Offsets may be negative to count from the last character
5765
     * in the string.
5766
     *
5767
     * @param string $str      <p>The input string.</p>
5768
     * @param string $needle   <p>Substring to look for.</p>
5769
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5770
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5771
     *
5772
     * @return false|int
5773
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5774
     */
5775
    public static function str_iindex_last(
5776
        string $str,
5777
        string $needle,
5778
        int $offset = 0,
5779
        string $encoding = 'UTF-8'
5780
    ) {
5781
        return self::strripos(
5782
            $str,
5783
            $needle,
5784
            $offset,
5785
            $encoding
5786
        );
5787
    }
5788
5789
    /**
5790
     * Returns the index of the first occurrence of $needle in the string,
5791
     * and false if not found. Accepts an optional offset from which to begin
5792
     * the search.
5793
     *
5794
     * @param string $str      <p>The input string.</p>
5795
     * @param string $needle   <p>Substring to look for.</p>
5796
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5797
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5798
     *
5799
     * @return false|int
5800
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5801
     */
5802 10
    public static function str_index_first(
5803
        string $str,
5804
        string $needle,
5805
        int $offset = 0,
5806
        string $encoding = 'UTF-8'
5807
    ) {
5808 10
        return self::strpos(
5809 10
            $str,
5810 10
            $needle,
5811 10
            $offset,
5812 10
            $encoding
5813
        );
5814
    }
5815
5816
    /**
5817
     * Returns the index of the last occurrence of $needle in the string,
5818
     * and false if not found. Accepts an optional offset from which to begin
5819
     * the search. Offsets may be negative to count from the last character
5820
     * in the string.
5821
     *
5822
     * @param string $str      <p>The input string.</p>
5823
     * @param string $needle   <p>Substring to look for.</p>
5824
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5825
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5826
     *
5827
     * @return false|int
5828
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5829
     */
5830 10
    public static function str_index_last(
5831
        string $str,
5832
        string $needle,
5833
        int $offset = 0,
5834
        string $encoding = 'UTF-8'
5835
    ) {
5836 10
        return self::strrpos(
5837 10
            $str,
5838 10
            $needle,
5839 10
            $offset,
5840 10
            $encoding
5841
        );
5842
    }
5843
5844
    /**
5845
     * Inserts $substring into the string at the $index provided.
5846
     *
5847
     * @param string $str       <p>The input string.</p>
5848
     * @param string $substring <p>String to be inserted.</p>
5849
     * @param int    $index     <p>The index at which to insert the substring.</p>
5850
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5851
     *
5852
     * @return string
5853
     */
5854 8
    public static function str_insert(
5855
        string $str,
5856
        string $substring,
5857
        int $index,
5858
        string $encoding = 'UTF-8'
5859
    ): string {
5860 8
        if ($encoding === 'UTF-8') {
5861 4
            $len = (int) \mb_strlen($str);
5862 4
            if ($index > $len) {
5863
                return $str;
5864
            }
5865
5866
            /** @noinspection UnnecessaryCastingInspection */
5867 4
            return (string) \mb_substr($str, 0, $index) .
5868 4
                   $substring .
5869 4
                   (string) \mb_substr($str, $index, $len);
5870
        }
5871
5872 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5873
5874 4
        $len = (int) self::strlen($str, $encoding);
5875 4
        if ($index > $len) {
5876 1
            return $str;
5877
        }
5878
5879 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5880 3
               $substring .
5881 3
               ((string) self::substr($str, $index, $len, $encoding));
5882
    }
5883
5884
    /**
5885
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5886
     *
5887
     * @see  http://php.net/manual/en/function.str-ireplace.php
5888
     *
5889
     * @param mixed $search  <p>
5890
     *                       Every replacement with search array is
5891
     *                       performed on the result of previous replacement.
5892
     *                       </p>
5893
     * @param mixed $replace <p>
5894
     *                       </p>
5895
     * @param mixed $subject <p>
5896
     *                       If subject is an array, then the search and
5897
     *                       replace is performed with every entry of
5898
     *                       subject, and the return value is an array as
5899
     *                       well.
5900
     *                       </p>
5901
     * @param int   $count   [optional] <p>
5902
     *                       The number of matched and replaced needles will
5903
     *                       be returned in count which is passed by
5904
     *                       reference.
5905
     *                       </p>
5906
     *
5907
     * @return mixed a string or an array of replacements
5908
     */
5909 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5910
    {
5911 29
        $search = (array) $search;
5912
5913
        /** @noinspection AlterInForeachInspection */
5914 29
        foreach ($search as &$s) {
5915 29
            $s = (string) $s;
5916 29
            if ($s === '') {
5917 6
                $s = '/^(?<=.)$/';
5918
            } else {
5919 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5920
            }
5921
        }
5922
5923 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5924 29
        $count = $replace; // used as reference parameter
5925
5926 29
        return $subject;
5927
    }
5928
5929
    /**
5930
     * Replaces $search from the beginning of string with $replacement.
5931
     *
5932
     * @param string $str         <p>The input string.</p>
5933
     * @param string $search      <p>The string to search for.</p>
5934
     * @param string $replacement <p>The replacement.</p>
5935
     *
5936
     * @return string string after the replacements
5937
     */
5938 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5939
    {
5940 17
        if ($str === '') {
5941 4
            if ($replacement === '') {
5942 2
                return '';
5943
            }
5944
5945 2
            if ($search === '') {
5946 2
                return $replacement;
5947
            }
5948
        }
5949
5950 13
        if ($search === '') {
5951 2
            return $str . $replacement;
5952
        }
5953
5954 11
        if (\stripos($str, $search) === 0) {
5955 10
            return $replacement . \substr($str, \strlen($search));
5956
        }
5957
5958 1
        return $str;
5959
    }
5960
5961
    /**
5962
     * Replaces $search from the ending of string with $replacement.
5963
     *
5964
     * @param string $str         <p>The input string.</p>
5965
     * @param string $search      <p>The string to search for.</p>
5966
     * @param string $replacement <p>The replacement.</p>
5967
     *
5968
     * @return string string after the replacements
5969
     */
5970 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5971
    {
5972 17
        if ($str === '') {
5973 4
            if ($replacement === '') {
5974 2
                return '';
5975
            }
5976
5977 2
            if ($search === '') {
5978 2
                return $replacement;
5979
            }
5980
        }
5981
5982 13
        if ($search === '') {
5983 2
            return $str . $replacement;
5984
        }
5985
5986 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5987 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5988
        }
5989
5990 11
        return $str;
5991
    }
5992
5993
    /**
5994
     * Check if the string starts with the given substring, case insensitive.
5995
     *
5996
     * @param string $haystack <p>The string to search in.</p>
5997
     * @param string $needle   <p>The substring to search for.</p>
5998
     *
5999
     * @return bool
6000
     */
6001 12
    public static function str_istarts_with(string $haystack, string $needle): bool
6002
    {
6003 12
        if ($haystack === '' || $needle === '') {
6004 2
            return false;
6005
        }
6006
6007 12
        return self::stripos($haystack, $needle) === 0;
6008
    }
6009
6010
    /**
6011
     * Returns true if the string begins with any of $substrings, false otherwise.
6012
     *
6013
     * - case-insensitive
6014
     *
6015
     * @param string $str        <p>The input string.</p>
6016
     * @param array  $substrings <p>Substrings to look for.</p>
6017
     *
6018
     * @return bool whether or not $str starts with $substring
6019
     */
6020 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
6021
    {
6022 4
        if ($str === '') {
6023
            return false;
6024
        }
6025
6026 4
        if ($substrings === []) {
6027
            return false;
6028
        }
6029
6030 4
        foreach ($substrings as &$substring) {
6031 4
            if (self::str_istarts_with($str, $substring)) {
6032 4
                return true;
6033
            }
6034
        }
6035
6036
        return false;
6037
    }
6038
6039
    /**
6040
     * Gets the substring after the first occurrence of a separator.
6041
     *
6042
     * @param string $str       <p>The input string.</p>
6043
     * @param string $separator <p>The string separator.</p>
6044
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6045
     *
6046
     * @return string
6047
     */
6048 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6049
    {
6050 1
        if ($separator === '' || $str === '') {
6051 1
            return '';
6052
        }
6053
6054 1
        $offset = self::str_iindex_first($str, $separator);
6055 1
        if ($offset === false) {
6056 1
            return '';
6057
        }
6058
6059 1
        if ($encoding === 'UTF-8') {
6060 1
            return (string) \mb_substr(
6061 1
                $str,
6062 1
                $offset + (int) \mb_strlen($separator)
6063
            );
6064
        }
6065
6066
        return (string) self::substr(
6067
            $str,
6068
            $offset + (int) self::strlen($separator, $encoding),
6069
            null,
6070
            $encoding
6071
        );
6072
    }
6073
6074
    /**
6075
     * Gets the substring after the last occurrence of a separator.
6076
     *
6077
     * @param string $str       <p>The input string.</p>
6078
     * @param string $separator <p>The string separator.</p>
6079
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6080
     *
6081
     * @return string
6082
     */
6083 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6084
    {
6085 1
        if ($separator === '' || $str === '') {
6086 1
            return '';
6087
        }
6088
6089 1
        $offset = self::strripos($str, $separator);
6090 1
        if ($offset === false) {
6091 1
            return '';
6092
        }
6093
6094 1
        if ($encoding === 'UTF-8') {
6095 1
            return (string) \mb_substr(
6096 1
                $str,
6097 1
                $offset + (int) self::strlen($separator)
6098
            );
6099
        }
6100
6101
        return (string) self::substr(
6102
            $str,
6103
            $offset + (int) self::strlen($separator, $encoding),
6104
            null,
6105
            $encoding
6106
        );
6107
    }
6108
6109
    /**
6110
     * Gets the substring before the first occurrence of a separator.
6111
     *
6112
     * @param string $str       <p>The input string.</p>
6113
     * @param string $separator <p>The string separator.</p>
6114
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6115
     *
6116
     * @return string
6117
     */
6118 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6119
    {
6120 1
        if ($separator === '' || $str === '') {
6121 1
            return '';
6122
        }
6123
6124 1
        $offset = self::str_iindex_first($str, $separator);
6125 1
        if ($offset === false) {
6126 1
            return '';
6127
        }
6128
6129 1
        if ($encoding === 'UTF-8') {
6130 1
            return (string) \mb_substr($str, 0, $offset);
6131
        }
6132
6133
        return (string) self::substr($str, 0, $offset, $encoding);
6134
    }
6135
6136
    /**
6137
     * Gets the substring before the last occurrence of a separator.
6138
     *
6139
     * @param string $str       <p>The input string.</p>
6140
     * @param string $separator <p>The string separator.</p>
6141
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6142
     *
6143
     * @return string
6144
     */
6145 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6146
    {
6147 1
        if ($separator === '' || $str === '') {
6148 1
            return '';
6149
        }
6150
6151 1
        if ($encoding === 'UTF-8') {
6152 1
            $offset = \mb_strripos($str, $separator);
6153 1
            if ($offset === false) {
6154 1
                return '';
6155
            }
6156
6157 1
            return (string) \mb_substr($str, 0, $offset);
6158
        }
6159
6160
        $offset = self::strripos($str, $separator, 0, $encoding);
6161
        if ($offset === false) {
6162
            return '';
6163
        }
6164
6165
        return (string) self::substr($str, 0, $offset, $encoding);
6166
    }
6167
6168
    /**
6169
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6170
     *
6171
     * @param string $str          <p>The input string.</p>
6172
     * @param string $needle       <p>The string to look for.</p>
6173
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6174
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6175
     *
6176
     * @return string
6177
     */
6178 2
    public static function str_isubstr_first(
6179
        string $str,
6180
        string $needle,
6181
        bool $beforeNeedle = false,
6182
        string $encoding = 'UTF-8'
6183
    ): string {
6184
        if (
6185 2
            $needle === ''
6186
            ||
6187 2
            $str === ''
6188
        ) {
6189 2
            return '';
6190
        }
6191
6192 2
        $part = self::stristr(
6193 2
            $str,
6194 2
            $needle,
6195 2
            $beforeNeedle,
6196 2
            $encoding
6197
        );
6198 2
        if ($part === false) {
6199 2
            return '';
6200
        }
6201
6202 2
        return $part;
6203
    }
6204
6205
    /**
6206
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6207
     *
6208
     * @param string $str          <p>The input string.</p>
6209
     * @param string $needle       <p>The string to look for.</p>
6210
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6211
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6212
     *
6213
     * @return string
6214
     */
6215 1
    public static function str_isubstr_last(
6216
        string $str,
6217
        string $needle,
6218
        bool $beforeNeedle = false,
6219
        string $encoding = 'UTF-8'
6220
    ): string {
6221
        if (
6222 1
            $needle === ''
6223
            ||
6224 1
            $str === ''
6225
        ) {
6226 1
            return '';
6227
        }
6228
6229 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6230 1
        if ($part === false) {
6231 1
            return '';
6232
        }
6233
6234 1
        return $part;
6235
    }
6236
6237
    /**
6238
     * Returns the last $n characters of the string.
6239
     *
6240
     * @param string $str      <p>The input string.</p>
6241
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6242
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6243
     *
6244
     * @return string
6245
     */
6246 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6247
    {
6248 12
        if ($str === '' || $n <= 0) {
6249 4
            return '';
6250
        }
6251
6252 8
        if ($encoding === 'UTF-8') {
6253 4
            return (string) \mb_substr($str, -$n);
6254
        }
6255
6256 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6257
6258 4
        return (string) self::substr($str, -$n, null, $encoding);
6259
    }
6260
6261
    /**
6262
     * Limit the number of characters in a string.
6263
     *
6264
     * @param string $str      <p>The input string.</p>
6265
     * @param int    $length   [optional] <p>Default: 100</p>
6266
     * @param string $strAddOn [optional] <p>Default: …</p>
6267
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6268
     *
6269
     * @return string
6270
     */
6271 2
    public static function str_limit(
6272
        string $str,
6273
        int $length = 100,
6274
        string $strAddOn = '…',
6275
        string $encoding = 'UTF-8'
6276
    ): string {
6277 2
        if ($str === '' || $length <= 0) {
6278 2
            return '';
6279
        }
6280
6281 2
        if ($encoding === 'UTF-8') {
6282 2
            if ((int) \mb_strlen($str) <= $length) {
6283 2
                return $str;
6284
            }
6285
6286
            /** @noinspection UnnecessaryCastingInspection */
6287 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6288
        }
6289
6290
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6291
6292
        if ((int) self::strlen($str, $encoding) <= $length) {
6293
            return $str;
6294
        }
6295
6296
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6297
    }
6298
6299
    /**
6300
     * Limit the number of characters in a string, but also after the next word.
6301
     *
6302
     * @param string $str      <p>The input string.</p>
6303
     * @param int    $length   [optional] <p>Default: 100</p>
6304
     * @param string $strAddOn [optional] <p>Default: …</p>
6305
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6306
     *
6307
     * @return string
6308
     */
6309 6
    public static function str_limit_after_word(
6310
        string $str,
6311
        int $length = 100,
6312
        string $strAddOn = '…',
6313
        string $encoding = 'UTF-8'
6314
    ): string {
6315 6
        if ($str === '' || $length <= 0) {
6316 2
            return '';
6317
        }
6318
6319 6
        if ($encoding === 'UTF-8') {
6320
            /** @noinspection UnnecessaryCastingInspection */
6321 2
            if ((int) \mb_strlen($str) <= $length) {
6322 2
                return $str;
6323
            }
6324
6325 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6326 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6327
            }
6328
6329 2
            $str = \mb_substr($str, 0, $length);
6330
6331 2
            $array = \explode(' ', $str);
6332 2
            \array_pop($array);
6333 2
            $new_str = \implode(' ', $array);
6334
6335 2
            if ($new_str === '') {
6336 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6337
            }
6338
        } else {
6339 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6340
                return $str;
6341
            }
6342
6343 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6344 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6345
            }
6346
6347 1
            $str = self::substr($str, 0, $length, $encoding);
6348 1
            if ($str === false) {
6349
                return '' . $strAddOn;
6350
            }
6351
6352 1
            $array = \explode(' ', $str);
6353 1
            \array_pop($array);
6354 1
            $new_str = \implode(' ', $array);
6355
6356 1
            if ($new_str === '') {
6357
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6358
            }
6359
        }
6360
6361 3
        return $new_str . $strAddOn;
6362
    }
6363
6364
    /**
6365
     * Returns the longest common prefix between the string and $otherStr.
6366
     *
6367
     * @param string $str      <p>The input sting.</p>
6368
     * @param string $otherStr <p>Second string for comparison.</p>
6369
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6370
     *
6371
     * @return string
6372
     */
6373 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6374
    {
6375
        // init
6376 10
        $longestCommonPrefix = '';
6377
6378 10
        if ($encoding === 'UTF-8') {
6379 5
            $maxLength = (int) \min(
6380 5
                \mb_strlen($str),
6381 5
                \mb_strlen($otherStr)
6382
            );
6383
6384 5
            for ($i = 0; $i < $maxLength; ++$i) {
6385 4
                $char = \mb_substr($str, $i, 1);
6386
6387
                if (
6388 4
                    $char !== false
6389
                    &&
6390 4
                    $char === \mb_substr($otherStr, $i, 1)
6391
                ) {
6392 3
                    $longestCommonPrefix .= $char;
6393
                } else {
6394 3
                    break;
6395
                }
6396
            }
6397
        } else {
6398 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6399
6400 5
            $maxLength = (int) \min(
6401 5
                self::strlen($str, $encoding),
6402 5
                self::strlen($otherStr, $encoding)
6403
            );
6404
6405 5
            for ($i = 0; $i < $maxLength; ++$i) {
6406 4
                $char = self::substr($str, $i, 1, $encoding);
6407
6408
                if (
6409 4
                    $char !== false
6410
                    &&
6411 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6412
                ) {
6413 3
                    $longestCommonPrefix .= $char;
6414
                } else {
6415 3
                    break;
6416
                }
6417
            }
6418
        }
6419
6420 10
        return $longestCommonPrefix;
6421
    }
6422
6423
    /**
6424
     * Returns the longest common substring between the string and $otherStr.
6425
     * In the case of ties, it returns that which occurs first.
6426
     *
6427
     * @param string $str
6428
     * @param string $otherStr <p>Second string for comparison.</p>
6429
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6430
     *
6431
     * @return string string with its $str being the longest common substring
6432
     */
6433 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6434
    {
6435 11
        if ($str === '' || $otherStr === '') {
6436 2
            return '';
6437
        }
6438
6439
        // Uses dynamic programming to solve
6440
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6441
6442 9
        if ($encoding === 'UTF-8') {
6443 4
            $strLength = (int) \mb_strlen($str);
6444 4
            $otherLength = (int) \mb_strlen($otherStr);
6445
        } else {
6446 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6447
6448 5
            $strLength = (int) self::strlen($str, $encoding);
6449 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6450
        }
6451
6452
        // Return if either string is empty
6453 9
        if ($strLength === 0 || $otherLength === 0) {
6454
            return '';
6455
        }
6456
6457 9
        $len = 0;
6458 9
        $end = 0;
6459 9
        $table = \array_fill(
6460 9
            0,
6461 9
            $strLength + 1,
6462 9
            \array_fill(0, $otherLength + 1, 0)
6463
        );
6464
6465 9
        if ($encoding === 'UTF-8') {
6466 9
            for ($i = 1; $i <= $strLength; ++$i) {
6467 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6468 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6469 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6470
6471 9
                    if ($strChar === $otherChar) {
6472 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6473 8
                        if ($table[$i][$j] > $len) {
6474 8
                            $len = $table[$i][$j];
6475 8
                            $end = $i;
6476
                        }
6477
                    } else {
6478 9
                        $table[$i][$j] = 0;
6479
                    }
6480
                }
6481
            }
6482
        } else {
6483
            for ($i = 1; $i <= $strLength; ++$i) {
6484
                for ($j = 1; $j <= $otherLength; ++$j) {
6485
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6486
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6487
6488
                    if ($strChar === $otherChar) {
6489
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6490
                        if ($table[$i][$j] > $len) {
6491
                            $len = $table[$i][$j];
6492
                            $end = $i;
6493
                        }
6494
                    } else {
6495
                        $table[$i][$j] = 0;
6496
                    }
6497
                }
6498
            }
6499
        }
6500
6501 9
        if ($encoding === 'UTF-8') {
6502 9
            return (string) \mb_substr($str, $end - $len, $len);
6503
        }
6504
6505
        return (string) self::substr($str, $end - $len, $len, $encoding);
6506
    }
6507
6508
    /**
6509
     * Returns the longest common suffix between the string and $otherStr.
6510
     *
6511
     * @param string $str
6512
     * @param string $otherStr <p>Second string for comparison.</p>
6513
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6514
     *
6515
     * @return string
6516
     */
6517 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6518
    {
6519 10
        if ($str === '' || $otherStr === '') {
6520 2
            return '';
6521
        }
6522
6523 8
        if ($encoding === 'UTF-8') {
6524 4
            $maxLength = (int) \min(
6525 4
                \mb_strlen($str, $encoding),
6526 4
                \mb_strlen($otherStr, $encoding)
6527
            );
6528
6529 4
            $longestCommonSuffix = '';
6530 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6531 4
                $char = \mb_substr($str, -$i, 1);
6532
6533
                if (
6534 4
                    $char !== false
6535
                    &&
6536 4
                    $char === \mb_substr($otherStr, -$i, 1)
6537
                ) {
6538 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6539
                } else {
6540 3
                    break;
6541
                }
6542
            }
6543
        } else {
6544 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6545
6546 4
            $maxLength = (int) \min(
6547 4
                self::strlen($str, $encoding),
6548 4
                self::strlen($otherStr, $encoding)
6549
            );
6550
6551 4
            $longestCommonSuffix = '';
6552 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6553 4
                $char = self::substr($str, -$i, 1, $encoding);
6554
6555
                if (
6556 4
                    $char !== false
6557
                    &&
6558 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6559
                ) {
6560 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6561
                } else {
6562 3
                    break;
6563
                }
6564
            }
6565
        }
6566
6567 8
        return $longestCommonSuffix;
6568
    }
6569
6570
    /**
6571
     * Returns true if $str matches the supplied pattern, false otherwise.
6572
     *
6573
     * @param string $str     <p>The input string.</p>
6574
     * @param string $pattern <p>Regex pattern to match against.</p>
6575
     *
6576
     * @return bool whether or not $str matches the pattern
6577
     */
6578
    public static function str_matches_pattern(string $str, string $pattern): bool
6579
    {
6580
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6581
    }
6582
6583
    /**
6584
     * Returns whether or not a character exists at an index. Offsets may be
6585
     * negative to count from the last character in the string. Implements
6586
     * part of the ArrayAccess interface.
6587
     *
6588
     * @param string $str      <p>The input string.</p>
6589
     * @param int    $offset   <p>The index to check.</p>
6590
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6591
     *
6592
     * @return bool whether or not the index exists
6593
     */
6594 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6595
    {
6596
        // init
6597 6
        $length = (int) self::strlen($str, $encoding);
6598
6599 6
        if ($offset >= 0) {
6600 3
            return $length > $offset;
6601
        }
6602
6603 3
        return $length >= \abs($offset);
6604
    }
6605
6606
    /**
6607
     * Returns the character at the given index. Offsets may be negative to
6608
     * count from the last character in the string. Implements part of the
6609
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6610
     * does not exist.
6611
     *
6612
     * @param string $str      <p>The input string.</p>
6613
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6614
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6615
     *
6616
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6617
     *
6618
     * @return string the character at the specified index
6619
     */
6620 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6621
    {
6622
        // init
6623 2
        $length = (int) self::strlen($str);
6624
6625
        if (
6626 2
            ($index >= 0 && $length <= $index)
6627
            ||
6628 2
            $length < \abs($index)
6629
        ) {
6630 1
            throw new \OutOfBoundsException('No character exists at the index');
6631
        }
6632
6633 1
        return self::char_at($str, $index, $encoding);
6634
    }
6635
6636
    /**
6637
     * Pad a UTF-8 string to given length with another string.
6638
     *
6639
     * @param string     $str        <p>The input string.</p>
6640
     * @param int        $pad_length <p>The length of return string.</p>
6641
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6642
     * @param int|string $pad_type   [optional] <p>
6643
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6644
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6645
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6646
     *                               </p>
6647
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6648
     *
6649
     * @return string returns the padded string
6650
     */
6651 41
    public static function str_pad(
6652
        string $str,
6653
        int $pad_length,
6654
        string $pad_string = ' ',
6655
        $pad_type = \STR_PAD_RIGHT,
6656
        string $encoding = 'UTF-8'
6657
    ): string {
6658 41
        if ($pad_length === 0 || $pad_string === '') {
6659 1
            return $str;
6660
        }
6661
6662 41
        if ($pad_type !== (int) $pad_type) {
6663 13
            if ($pad_type === 'left') {
6664 3
                $pad_type = \STR_PAD_LEFT;
6665 10
            } elseif ($pad_type === 'right') {
6666 6
                $pad_type = \STR_PAD_RIGHT;
6667 4
            } elseif ($pad_type === 'both') {
6668 3
                $pad_type = \STR_PAD_BOTH;
6669
            } else {
6670 1
                throw new \InvalidArgumentException(
6671 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6672
                );
6673
            }
6674
        }
6675
6676 40
        if ($encoding === 'UTF-8') {
6677 25
            $str_length = (int) \mb_strlen($str);
6678
6679 25
            if ($pad_length >= $str_length) {
6680
                switch ($pad_type) {
6681 25
                    case \STR_PAD_LEFT:
6682 8
                        $ps_length = (int) \mb_strlen($pad_string);
6683
6684 8
                        $diff = ($pad_length - $str_length);
6685
6686 8
                        $pre = (string) \mb_substr(
6687 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6688 8
                            0,
6689 8
                            $diff
6690
                        );
6691 8
                        $post = '';
6692
6693 8
                        break;
6694
6695 20
                    case \STR_PAD_BOTH:
6696 14
                        $diff = ($pad_length - $str_length);
6697
6698 14
                        $ps_length_left = (int) \floor($diff / 2);
6699
6700 14
                        $ps_length_right = (int) \ceil($diff / 2);
6701
6702 14
                        $pre = (string) \mb_substr(
6703 14
                            \str_repeat($pad_string, $ps_length_left),
6704 14
                            0,
6705 14
                            $ps_length_left
6706
                        );
6707 14
                        $post = (string) \mb_substr(
6708 14
                            \str_repeat($pad_string, $ps_length_right),
6709 14
                            0,
6710 14
                            $ps_length_right
6711
                        );
6712
6713 14
                        break;
6714
6715 9
                    case \STR_PAD_RIGHT:
6716
                    default:
6717 9
                        $ps_length = (int) \mb_strlen($pad_string);
6718
6719 9
                        $diff = ($pad_length - $str_length);
6720
6721 9
                        $post = (string) \mb_substr(
6722 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6723 9
                            0,
6724 9
                            $diff
6725
                        );
6726 9
                        $pre = '';
6727
                }
6728
6729 25
                return $pre . $str . $post;
6730
            }
6731
6732 3
            return $str;
6733
        }
6734
6735 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6736
6737 15
        $str_length = (int) self::strlen($str, $encoding);
6738
6739 15
        if ($pad_length >= $str_length) {
6740
            switch ($pad_type) {
6741 14
                case \STR_PAD_LEFT:
6742 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6743
6744 5
                    $diff = ($pad_length - $str_length);
6745
6746 5
                    $pre = (string) self::substr(
6747 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6748 5
                        0,
6749 5
                        $diff,
6750 5
                        $encoding
6751
                    );
6752 5
                    $post = '';
6753
6754 5
                    break;
6755
6756 9
                case \STR_PAD_BOTH:
6757 3
                    $diff = ($pad_length - $str_length);
6758
6759 3
                    $ps_length_left = (int) \floor($diff / 2);
6760
6761 3
                    $ps_length_right = (int) \ceil($diff / 2);
6762
6763 3
                    $pre = (string) self::substr(
6764 3
                        \str_repeat($pad_string, $ps_length_left),
6765 3
                        0,
6766 3
                        $ps_length_left,
6767 3
                        $encoding
6768
                    );
6769 3
                    $post = (string) self::substr(
6770 3
                        \str_repeat($pad_string, $ps_length_right),
6771 3
                        0,
6772 3
                        $ps_length_right,
6773 3
                        $encoding
6774
                    );
6775
6776 3
                    break;
6777
6778 6
                case \STR_PAD_RIGHT:
6779
                default:
6780 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6781
6782 6
                    $diff = ($pad_length - $str_length);
6783
6784 6
                    $post = (string) self::substr(
6785 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6786 6
                        0,
6787 6
                        $diff,
6788 6
                        $encoding
6789
                    );
6790 6
                    $pre = '';
6791
            }
6792
6793 14
            return $pre . $str . $post;
6794
        }
6795
6796 1
        return $str;
6797
    }
6798
6799
    /**
6800
     * Returns a new string of a given length such that both sides of the
6801
     * string are padded. Alias for pad() with a $padType of 'both'.
6802
     *
6803
     * @param string $str
6804
     * @param int    $length   <p>Desired string length after padding.</p>
6805
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6806
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6807
     *
6808
     * @return string string with padding applied
6809
     */
6810 11
    public static function str_pad_both(
6811
        string $str,
6812
        int $length,
6813
        string $padStr = ' ',
6814
        string $encoding = 'UTF-8'
6815
    ): string {
6816 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6817
    }
6818
6819
    /**
6820
     * Returns a new string of a given length such that the beginning of the
6821
     * string is padded. Alias for pad() with a $padType of 'left'.
6822
     *
6823
     * @param string $str
6824
     * @param int    $length   <p>Desired string length after padding.</p>
6825
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6826
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6827
     *
6828
     * @return string string with left padding
6829
     */
6830 7
    public static function str_pad_left(
6831
        string $str,
6832
        int $length,
6833
        string $padStr = ' ',
6834
        string $encoding = 'UTF-8'
6835
    ): string {
6836 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6837
    }
6838
6839
    /**
6840
     * Returns a new string of a given length such that the end of the string
6841
     * is padded. Alias for pad() with a $padType of 'right'.
6842
     *
6843
     * @param string $str
6844
     * @param int    $length   <p>Desired string length after padding.</p>
6845
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6846
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6847
     *
6848
     * @return string string with right padding
6849
     */
6850 7
    public static function str_pad_right(
6851
        string $str,
6852
        int $length,
6853
        string $padStr = ' ',
6854
        string $encoding = 'UTF-8'
6855
    ): string {
6856 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6857
    }
6858
6859
    /**
6860
     * Repeat a string.
6861
     *
6862
     * @param string $str        <p>
6863
     *                           The string to be repeated.
6864
     *                           </p>
6865
     * @param int    $multiplier <p>
6866
     *                           Number of time the input string should be
6867
     *                           repeated.
6868
     *                           </p>
6869
     *                           <p>
6870
     *                           multiplier has to be greater than or equal to 0.
6871
     *                           If the multiplier is set to 0, the function
6872
     *                           will return an empty string.
6873
     *                           </p>
6874
     *
6875
     * @return string the repeated string
6876
     */
6877 9
    public static function str_repeat(string $str, int $multiplier): string
6878
    {
6879 9
        $str = self::filter($str);
6880
6881 9
        return \str_repeat($str, $multiplier);
6882
    }
6883
6884
    /**
6885
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6886
     *
6887
     * Replace all occurrences of the search string with the replacement string
6888
     *
6889
     * @see http://php.net/manual/en/function.str-replace.php
6890
     *
6891
     * @param mixed $search  <p>
6892
     *                       The value being searched for, otherwise known as the needle.
6893
     *                       An array may be used to designate multiple needles.
6894
     *                       </p>
6895
     * @param mixed $replace <p>
6896
     *                       The replacement value that replaces found search
6897
     *                       values. An array may be used to designate multiple replacements.
6898
     *                       </p>
6899
     * @param mixed $subject <p>
6900
     *                       The string or array being searched and replaced on,
6901
     *                       otherwise known as the haystack.
6902
     *                       </p>
6903
     *                       <p>
6904
     *                       If subject is an array, then the search and
6905
     *                       replace is performed with every entry of
6906
     *                       subject, and the return value is an array as
6907
     *                       well.
6908
     *                       </p>
6909
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6910
     *
6911
     * @return mixed this function returns a string or an array with the replaced values
6912
     */
6913 12
    public static function str_replace(
6914
        $search,
6915
        $replace,
6916
        $subject,
6917
        int &$count = null
6918
    ) {
6919
        /**
6920
         * @psalm-suppress PossiblyNullArgument
6921
         */
6922 12
        return \str_replace(
6923 12
            $search,
6924 12
            $replace,
6925 12
            $subject,
6926 12
            $count
6927
        );
6928
    }
6929
6930
    /**
6931
     * Replaces $search from the beginning of string with $replacement.
6932
     *
6933
     * @param string $str         <p>The input string.</p>
6934
     * @param string $search      <p>The string to search for.</p>
6935
     * @param string $replacement <p>The replacement.</p>
6936
     *
6937
     * @return string string after the replacements
6938
     */
6939 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6940
    {
6941 17
        if ($str === '') {
6942 4
            if ($replacement === '') {
6943 2
                return '';
6944
            }
6945
6946 2
            if ($search === '') {
6947 2
                return $replacement;
6948
            }
6949
        }
6950
6951 13
        if ($search === '') {
6952 2
            return $str . $replacement;
6953
        }
6954
6955 11
        if (\strpos($str, $search) === 0) {
6956 9
            return $replacement . \substr($str, \strlen($search));
6957
        }
6958
6959 2
        return $str;
6960
    }
6961
6962
    /**
6963
     * Replaces $search from the ending of string with $replacement.
6964
     *
6965
     * @param string $str         <p>The input string.</p>
6966
     * @param string $search      <p>The string to search for.</p>
6967
     * @param string $replacement <p>The replacement.</p>
6968
     *
6969
     * @return string string after the replacements
6970
     */
6971 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6972
    {
6973 17
        if ($str === '') {
6974 4
            if ($replacement === '') {
6975 2
                return '';
6976
            }
6977
6978 2
            if ($search === '') {
6979 2
                return $replacement;
6980
            }
6981
        }
6982
6983 13
        if ($search === '') {
6984 2
            return $str . $replacement;
6985
        }
6986
6987 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6988 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6989
        }
6990
6991 11
        return $str;
6992
    }
6993
6994
    /**
6995
     * Replace the first "$search"-term with the "$replace"-term.
6996
     *
6997
     * @param string $search
6998
     * @param string $replace
6999
     * @param string $subject
7000
     *
7001
     * @return string
7002
     *
7003
     * @psalm-suppress InvalidReturnType
7004
     */
7005 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
7006
    {
7007 2
        $pos = self::strpos($subject, $search);
7008
7009 2
        if ($pos !== false) {
7010
            /**
7011
             * @psalm-suppress InvalidReturnStatement
7012
             */
7013 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7014 2
                $subject,
7015 2
                $replace,
7016 2
                $pos,
7017 2
                (int) self::strlen($search)
7018
            );
7019
        }
7020
7021 2
        return $subject;
7022
    }
7023
7024
    /**
7025
     * Replace the last "$search"-term with the "$replace"-term.
7026
     *
7027
     * @param string $search
7028
     * @param string $replace
7029
     * @param string $subject
7030
     *
7031
     * @return string
7032
     *
7033
     * @psalm-suppress InvalidReturnType
7034
     */
7035 2
    public static function str_replace_last(
7036
        string $search,
7037
        string $replace,
7038
        string $subject
7039
    ): string {
7040 2
        $pos = self::strrpos($subject, $search);
7041 2
        if ($pos !== false) {
7042
            /**
7043
             * @psalm-suppress InvalidReturnStatement
7044
             */
7045 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7046 2
                $subject,
7047 2
                $replace,
7048 2
                $pos,
7049 2
                (int) self::strlen($search)
7050
            );
7051
        }
7052
7053 2
        return $subject;
7054
    }
7055
7056
    /**
7057
     * Shuffles all the characters in the string.
7058
     *
7059
     * PS: uses random algorithm which is weak for cryptography purposes
7060
     *
7061
     * @param string $str      <p>The input string</p>
7062
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7063
     *
7064
     * @return string the shuffled string
7065
     */
7066 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7067
    {
7068 5
        if ($encoding === 'UTF-8') {
7069 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7070
            /** @noinspection NonSecureShuffleUsageInspection */
7071 5
            \shuffle($indexes);
7072
7073
            // init
7074 5
            $shuffledStr = '';
7075
7076 5
            foreach ($indexes as &$i) {
7077 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7078 5
                if ($tmpSubStr !== false) {
7079 5
                    $shuffledStr .= $tmpSubStr;
7080
                }
7081
            }
7082
        } else {
7083
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7084
7085
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7086
            /** @noinspection NonSecureShuffleUsageInspection */
7087
            \shuffle($indexes);
7088
7089
            // init
7090
            $shuffledStr = '';
7091
7092
            foreach ($indexes as &$i) {
7093
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7094
                if ($tmpSubStr !== false) {
7095
                    $shuffledStr .= $tmpSubStr;
7096
                }
7097
            }
7098
        }
7099
7100 5
        return $shuffledStr;
7101
    }
7102
7103
    /**
7104
     * Returns the substring beginning at $start, and up to, but not including
7105
     * the index specified by $end. If $end is omitted, the function extracts
7106
     * the remaining string. If $end is negative, it is computed from the end
7107
     * of the string.
7108
     *
7109
     * @param string $str
7110
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7111
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7112
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7113
     *
7114
     * @return false|string
7115
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7116
     *                      characters long, <b>FALSE</b> will be returned.
7117
     */
7118 18
    public static function str_slice(
7119
        string $str,
7120
        int $start,
7121
        int $end = null,
7122
        string $encoding = 'UTF-8'
7123
    ) {
7124 18
        if ($encoding === 'UTF-8') {
7125 7
            if ($end === null) {
7126 1
                $length = (int) \mb_strlen($str);
7127 6
            } elseif ($end >= 0 && $end <= $start) {
7128 2
                return '';
7129 4
            } elseif ($end < 0) {
7130 1
                $length = (int) \mb_strlen($str) + $end - $start;
7131
            } else {
7132 3
                $length = $end - $start;
7133
            }
7134
7135 5
            return \mb_substr($str, $start, $length);
7136
        }
7137
7138 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7139
7140 11
        if ($end === null) {
7141 5
            $length = (int) self::strlen($str, $encoding);
7142 6
        } elseif ($end >= 0 && $end <= $start) {
7143 2
            return '';
7144 4
        } elseif ($end < 0) {
7145 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7146
        } else {
7147 3
            $length = $end - $start;
7148
        }
7149
7150 9
        return self::substr($str, $start, $length, $encoding);
7151
    }
7152
7153
    /**
7154
     * Convert a string to e.g.: "snake_case"
7155
     *
7156
     * @param string $str
7157
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7158
     *
7159
     * @return string string in snake_case
7160
     */
7161 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7162
    {
7163 20
        if ($str === '') {
7164
            return '';
7165
        }
7166
7167 20
        $str = \str_replace(
7168 20
            '-',
7169 20
            '_',
7170 20
            self::normalize_whitespace($str)
7171
        );
7172
7173 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7174 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7175
        }
7176
7177 20
        $str = (string) \preg_replace_callback(
7178 20
            '/([\d|\p{Lu}])/u',
7179
            /**
7180
             * @param string[] $matches
7181
             *
7182
             * @return string
7183
             */
7184
            static function (array $matches) use ($encoding): string {
7185 9
                $match = $matches[1];
7186 9
                $matchInt = (int) $match;
7187
7188 9
                if ((string) $matchInt === $match) {
7189 4
                    return '_' . $match . '_';
7190
                }
7191
7192 5
                if ($encoding === 'UTF-8') {
7193 5
                    return '_' . \mb_strtolower($match);
7194
                }
7195
7196
                return '_' . self::strtolower($match, $encoding);
7197 20
            },
7198 20
            $str
7199
        );
7200
7201 20
        $str = (string) \preg_replace(
7202
            [
7203 20
                '/\s+/u',        // convert spaces to "_"
7204
                '/^\s+|\s+$/u',  // trim leading & trailing spaces
7205
                '/_+/',         // remove double "_"
7206
            ],
7207
            [
7208 20
                '_',
7209
                '',
7210
                '_',
7211
            ],
7212 20
            $str
7213
        );
7214
7215 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7216
    }
7217
7218
    /**
7219
     * Sort all characters according to code points.
7220
     *
7221
     * @param string $str    <p>A UTF-8 string.</p>
7222
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7223
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7224
     *
7225
     * @return string string of sorted characters
7226
     */
7227 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7228
    {
7229 2
        $array = self::codepoints($str);
7230
7231 2
        if ($unique) {
7232 2
            $array = \array_flip(\array_flip($array));
7233
        }
7234
7235 2
        if ($desc) {
7236 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7236
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7237
        } else {
7238 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7238
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7239
        }
7240
7241 2
        return self::string($array);
7242
    }
7243
7244
    /**
7245
     * alias for "UTF8::str_split()"
7246
     *
7247
     * @see UTF8::str_split()
7248
     *
7249
     * @param string|string[] $str
7250
     * @param int             $length
7251
     * @param bool            $cleanUtf8
7252
     *
7253
     * @return string[]
7254
     */
7255 9
    public static function split(
7256
        $str,
7257
        int $length = 1,
7258
        bool $cleanUtf8 = false
7259
    ): array {
7260 9
        return self::str_split($str, $length, $cleanUtf8);
7261
    }
7262
7263
    /**
7264
     * Splits the string with the provided regular expression, returning an
7265
     * array of Stringy objects. An optional integer $limit will truncate the
7266
     * results.
7267
     *
7268
     * @param string $str
7269
     * @param string $pattern <p>The regex with which to split the string.</p>
7270
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7271
     *
7272
     * @return string[] an array of strings
7273
     */
7274 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7275
    {
7276 16
        if ($limit === 0) {
7277 2
            return [];
7278
        }
7279
7280 14
        if ($pattern === '') {
7281 1
            return [$str];
7282
        }
7283
7284 13
        if (self::$SUPPORT['mbstring'] === true) {
7285 13
            if ($limit >= 0) {
7286
                /** @noinspection PhpComposerExtensionStubsInspection */
7287 8
                $resultTmp = \mb_split($pattern, $str);
7288
7289 8
                $result = [];
7290 8
                foreach ($resultTmp as $itemTmp) {
7291 8
                    if ($limit === 0) {
7292 4
                        break;
7293
                    }
7294 8
                    --$limit;
7295
7296 8
                    $result[] = $itemTmp;
7297
                }
7298
7299 8
                return $result;
7300
            }
7301
7302
            /** @noinspection PhpComposerExtensionStubsInspection */
7303 5
            return \mb_split($pattern, $str);
7304
        }
7305
7306
        if ($limit > 0) {
7307
            ++$limit;
7308
        } else {
7309
            $limit = -1;
7310
        }
7311
7312
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7313
7314
        if ($array === false) {
7315
            return [];
7316
        }
7317
7318
        if ($limit > 0 && \count($array) === $limit) {
7319
            \array_pop($array);
7320
        }
7321
7322
        return $array;
7323
    }
7324
7325
    /**
7326
     * Check if the string starts with the given substring.
7327
     *
7328
     * @param string $haystack <p>The string to search in.</p>
7329
     * @param string $needle   <p>The substring to search for.</p>
7330
     *
7331
     * @return bool
7332
     */
7333 19
    public static function str_starts_with(string $haystack, string $needle): bool
7334
    {
7335 19
        return \strpos($haystack, $needle) === 0;
7336
    }
7337
7338
    /**
7339
     * Returns true if the string begins with any of $substrings, false otherwise.
7340
     *
7341
     * - case-sensitive
7342
     *
7343
     * @param string $str        <p>The input string.</p>
7344
     * @param array  $substrings <p>Substrings to look for.</p>
7345
     *
7346
     * @return bool whether or not $str starts with $substring
7347
     */
7348 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7349
    {
7350 8
        if ($str === '') {
7351
            return false;
7352
        }
7353
7354 8
        if ($substrings === []) {
7355
            return false;
7356
        }
7357
7358 8
        foreach ($substrings as &$substring) {
7359 8
            if (self::str_starts_with($str, $substring)) {
7360 8
                return true;
7361
            }
7362
        }
7363
7364 6
        return false;
7365
    }
7366
7367
    /**
7368
     * Gets the substring after the first occurrence of a separator.
7369
     *
7370
     * @param string $str       <p>The input string.</p>
7371
     * @param string $separator <p>The string separator.</p>
7372
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7373
     *
7374
     * @return string
7375
     */
7376 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7377
    {
7378 1
        if ($separator === '' || $str === '') {
7379 1
            return '';
7380
        }
7381
7382 1
        if ($encoding === 'UTF-8') {
7383 1
            $offset = \mb_strpos($str, $separator);
7384 1
            if ($offset === false) {
7385 1
                return '';
7386
            }
7387
7388 1
            return (string) \mb_substr(
7389 1
                $str,
7390 1
                $offset + (int) \mb_strlen($separator)
7391
            );
7392
        }
7393
7394
        $offset = self::strpos($str, $separator, 0, $encoding);
7395
        if ($offset === false) {
7396
            return '';
7397
        }
7398
7399
        return (string) \mb_substr(
7400
            $str,
7401
            $offset + (int) self::strlen($separator, $encoding),
7402
            null,
7403
            $encoding
7404
        );
7405
    }
7406
7407
    /**
7408
     * Gets the substring after the last occurrence of a separator.
7409
     *
7410
     * @param string $str       <p>The input string.</p>
7411
     * @param string $separator <p>The string separator.</p>
7412
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7413
     *
7414
     * @return string
7415
     */
7416 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7417
    {
7418 1
        if ($separator === '' || $str === '') {
7419 1
            return '';
7420
        }
7421
7422 1
        if ($encoding === 'UTF-8') {
7423 1
            $offset = \mb_strrpos($str, $separator);
7424 1
            if ($offset === false) {
7425 1
                return '';
7426
            }
7427
7428 1
            return (string) \mb_substr(
7429 1
                $str,
7430 1
                $offset + (int) \mb_strlen($separator)
7431
            );
7432
        }
7433
7434
        $offset = self::strrpos($str, $separator, 0, $encoding);
7435
        if ($offset === false) {
7436
            return '';
7437
        }
7438
7439
        return (string) self::substr(
7440
            $str,
7441
            $offset + (int) self::strlen($separator, $encoding),
7442
            null,
7443
            $encoding
7444
        );
7445
    }
7446
7447
    /**
7448
     * Gets the substring before the first occurrence of a separator.
7449
     *
7450
     * @param string $str       <p>The input string.</p>
7451
     * @param string $separator <p>The string separator.</p>
7452
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7453
     *
7454
     * @return string
7455
     */
7456 1
    public static function str_substr_before_first_separator(
7457
        string $str,
7458
        string $separator,
7459
        string $encoding = 'UTF-8'
7460
    ): string {
7461 1
        if ($separator === '' || $str === '') {
7462 1
            return '';
7463
        }
7464
7465 1
        if ($encoding === 'UTF-8') {
7466 1
            $offset = \mb_strpos($str, $separator);
7467 1
            if ($offset === false) {
7468 1
                return '';
7469
            }
7470
7471 1
            return (string) \mb_substr(
7472 1
                $str,
7473 1
                0,
7474 1
                $offset
7475
            );
7476
        }
7477
7478
        $offset = self::strpos($str, $separator, 0, $encoding);
7479
        if ($offset === false) {
7480
            return '';
7481
        }
7482
7483
        return (string) self::substr(
7484
            $str,
7485
            0,
7486
            $offset,
7487
            $encoding
7488
        );
7489
    }
7490
7491
    /**
7492
     * Gets the substring before the last occurrence of a separator.
7493
     *
7494
     * @param string $str       <p>The input string.</p>
7495
     * @param string $separator <p>The string separator.</p>
7496
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7497
     *
7498
     * @return string
7499
     */
7500 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7501
    {
7502 1
        if ($separator === '' || $str === '') {
7503 1
            return '';
7504
        }
7505
7506 1
        if ($encoding === 'UTF-8') {
7507 1
            $offset = \mb_strrpos($str, $separator);
7508 1
            if ($offset === false) {
7509 1
                return '';
7510
            }
7511
7512 1
            return (string) \mb_substr(
7513 1
                $str,
7514 1
                0,
7515 1
                $offset
7516
            );
7517
        }
7518
7519
        $offset = self::strrpos($str, $separator, 0, $encoding);
7520
        if ($offset === false) {
7521
            return '';
7522
        }
7523
7524
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7525
7526
        return (string) self::substr(
7527
            $str,
7528
            0,
7529
            $offset,
7530
            $encoding
7531
        );
7532
    }
7533
7534
    /**
7535
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7536
     *
7537
     * @param string $str          <p>The input string.</p>
7538
     * @param string $needle       <p>The string to look for.</p>
7539
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7540
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7541
     *
7542
     * @return string
7543
     */
7544 2
    public static function str_substr_first(
7545
        string $str,
7546
        string $needle,
7547
        bool $beforeNeedle = false,
7548
        string $encoding = 'UTF-8'
7549
    ): string {
7550 2
        if ($str === '' || $needle === '') {
7551 2
            return '';
7552
        }
7553
7554 2
        if ($encoding === 'UTF-8') {
7555 2
            if ($beforeNeedle === true) {
7556 1
                $part = \mb_strstr(
7557 1
                    $str,
7558 1
                    $needle,
7559 1
                    $beforeNeedle
7560
                );
7561
            } else {
7562 1
                $part = \mb_strstr(
7563 1
                    $str,
7564 2
                    $needle
7565
                );
7566
            }
7567
        } else {
7568
            $part = self::strstr(
7569
                $str,
7570
                $needle,
7571
                $beforeNeedle,
7572
                $encoding
7573
            );
7574
        }
7575
7576 2
        return $part === false ? '' : $part;
7577
    }
7578
7579
    /**
7580
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7581
     *
7582
     * @param string $str          <p>The input string.</p>
7583
     * @param string $needle       <p>The string to look for.</p>
7584
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7585
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7586
     *
7587
     * @return string
7588
     */
7589 2
    public static function str_substr_last(
7590
        string $str,
7591
        string $needle,
7592
        bool $beforeNeedle = false,
7593
        string $encoding = 'UTF-8'
7594
    ): string {
7595 2
        if ($str === '' || $needle === '') {
7596 2
            return '';
7597
        }
7598
7599 2
        if ($encoding === 'UTF-8') {
7600 2
            if ($beforeNeedle === true) {
7601 1
                $part = \mb_strrchr(
7602 1
                    $str,
7603 1
                    $needle,
7604 1
                    $beforeNeedle
7605
                );
7606
            } else {
7607 1
                $part = \mb_strrchr(
7608 1
                    $str,
7609 2
                    $needle
7610
                );
7611
            }
7612
        } else {
7613
            $part = self::strrchr(
7614
                $str,
7615
                $needle,
7616
                $beforeNeedle,
7617
                $encoding
7618
            );
7619
        }
7620
7621 2
        return $part === false ? '' : $part;
7622
    }
7623
7624
    /**
7625
     * Surrounds $str with the given substring.
7626
     *
7627
     * @param string $str
7628
     * @param string $substring <p>The substring to add to both sides.</P>
7629
     *
7630
     * @return string string with the substring both prepended and appended
7631
     */
7632 5
    public static function str_surround(string $str, string $substring): string
7633
    {
7634 5
        return $substring . $str . $substring;
7635
    }
7636
7637
    /**
7638
     * Returns a trimmed string with the first letter of each word capitalized.
7639
     * Also accepts an array, $ignore, allowing you to list words not to be
7640
     * capitalized.
7641
     *
7642
     * @param string              $str
7643
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7644
     *                                                   Default: null</p>
7645
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7646
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7647
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7648
     *                                                   tr</p>
7649
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7650
     *                                                   ß</p>
7651
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7652
     *
7653
     * @return string the titleized string
7654
     */
7655 5
    public static function str_titleize(
7656
        string $str,
7657
        array $ignore = null,
7658
        string $encoding = 'UTF-8',
7659
        bool $cleanUtf8 = false,
7660
        string $lang = null,
7661
        bool $tryToKeepStringLength = false,
7662
        bool $useTrimFirst = true
7663
    ): string {
7664 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7665 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7666
        }
7667
7668 5
        if ($useTrimFirst === true) {
7669 5
            $str = \trim($str);
7670
        }
7671
7672 5
        if ($cleanUtf8 === true) {
7673
            $str = self::clean($str);
7674
        }
7675
7676 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7677
7678 5
        return (string) \preg_replace_callback(
7679 5
            '/([\S]+)/u',
7680
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7681 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7682 2
                    return $match[0];
7683
                }
7684
7685 5
                if ($useMbFunction === true) {
7686 5
                    if ($encoding === 'UTF-8') {
7687 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7688 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7689
                    }
7690
7691
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7692
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7693
                }
7694
7695
                return self::ucfirst(
7696
                    self::strtolower(
7697
                        $match[0],
7698
                        $encoding,
7699
                        false,
7700
                        $lang,
7701
                        $tryToKeepStringLength
7702
                    ),
7703
                    $encoding,
7704
                    false,
7705
                    $lang,
7706
                    $tryToKeepStringLength
7707
                );
7708 5
            },
7709 5
            $str
7710
        );
7711
    }
7712
7713
    /**
7714
     * Returns a trimmed string in proper title case.
7715
     *
7716
     * Also accepts an array, $ignore, allowing you to list words not to be
7717
     * capitalized.
7718
     *
7719
     * Adapted from John Gruber's script.
7720
     *
7721
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7722
     *
7723
     * @param string $str
7724
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7725
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7726
     *
7727
     * @return string the titleized string
7728
     */
7729 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7730
    {
7731 35
        $smallWords = \array_merge(
7732
            [
7733 35
                '(?<!q&)a',
7734
                'an',
7735
                'and',
7736
                'as',
7737
                'at(?!&t)',
7738
                'but',
7739
                'by',
7740
                'en',
7741
                'for',
7742
                'if',
7743
                'in',
7744
                'of',
7745
                'on',
7746
                'or',
7747
                'the',
7748
                'to',
7749
                'v[.]?',
7750
                'via',
7751
                'vs[.]?',
7752
            ],
7753 35
            $ignore
7754
        );
7755
7756 35
        $smallWordsRx = \implode('|', $smallWords);
7757 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7758
7759 35
        $str = \trim($str);
7760
7761 35
        if (self::has_lowercase($str) === false) {
7762 2
            $str = self::strtolower($str, $encoding);
7763
        }
7764
7765
        // the main substitutions
7766 35
        $str = (string) \preg_replace_callback(
7767
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7768
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7769 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7770
                        |
7771 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7772
                        |
7773 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7774
                        |
7775 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7776
                      ) (_*) \b                                                           # 6. With trailing underscore
7777
                    ~ux',
7778
            /**
7779
             * @param string[] $matches
7780
             *
7781
             * @return string
7782
             */
7783
            static function (array $matches) use ($encoding): string {
7784
                // preserve leading underscore
7785 35
                $str = $matches[1];
7786 35
                if ($matches[2]) {
7787
                    // preserve URLs, domains, emails and file paths
7788 5
                    $str .= $matches[2];
7789 35
                } elseif ($matches[3]) {
7790
                    // lower-case small words
7791 25
                    $str .= self::strtolower($matches[3], $encoding);
7792 35
                } elseif ($matches[4]) {
7793
                    // capitalize word w/o internal caps
7794 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7795
                } else {
7796
                    // preserve other kinds of word (iPhone)
7797 7
                    $str .= $matches[5];
7798
                }
7799
                // Preserve trailing underscore
7800 35
                $str .= $matches[6];
7801
7802 35
                return $str;
7803 35
            },
7804 35
            $str
7805
        );
7806
7807
        // Exceptions for small words: capitalize at start of title...
7808 35
        $str = (string) \preg_replace_callback(
7809
            '~(  \A [[:punct:]]*                # start of title...
7810
                      |  [:.;?!][ ]+               # or of subsentence...
7811
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7812 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7813
                     ~uxi',
7814
            /**
7815
             * @param string[] $matches
7816
             *
7817
             * @return string
7818
             */
7819
            static function (array $matches) use ($encoding): string {
7820 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7821 35
            },
7822 35
            $str
7823
        );
7824
7825
        // ...and end of title
7826 35
        $str = (string) \preg_replace_callback(
7827 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7828
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7829
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7830
                     ~uxi',
7831
            /**
7832
             * @param string[] $matches
7833
             *
7834
             * @return string
7835
             */
7836
            static function (array $matches) use ($encoding): string {
7837 3
                return static::str_upper_first($matches[1], $encoding);
7838 35
            },
7839 35
            $str
7840
        );
7841
7842
        // Exceptions for small words in hyphenated compound words.
7843
        // e.g. "in-flight" -> In-Flight
7844 35
        $str = (string) \preg_replace_callback(
7845
            '~\b
7846
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7847 35
                        ( ' . $smallWordsRx . ' )
7848
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7849
                       ~uxi',
7850
            /**
7851
             * @param string[] $matches
7852
             *
7853
             * @return string
7854
             */
7855
            static function (array $matches) use ($encoding): string {
7856
                return static::str_upper_first($matches[1], $encoding);
7857 35
            },
7858 35
            $str
7859
        );
7860
7861
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7862 35
        $str = (string) \preg_replace_callback(
7863
            '~\b
7864
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7865
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7866 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7867
                      (?!	- )                   # Negative lookahead for another -
7868
                     ~uxi',
7869
            /**
7870
             * @param string[] $matches
7871
             *
7872
             * @return string
7873
             */
7874
            static function (array $matches) use ($encoding): string {
7875
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7876 35
            },
7877 35
            $str
7878
        );
7879
7880 35
        return $str;
7881
    }
7882
7883
    /**
7884
     * Get a binary representation of a specific string.
7885
     *
7886
     * @param string $str <p>The input string.</p>
7887
     *
7888
     * @return string
7889
     */
7890 2
    public static function str_to_binary(string $str): string
7891
    {
7892 2
        $value = \unpack('H*', $str);
7893
7894 2
        return \base_convert($value[1], 16, 2);
7895
    }
7896
7897
    /**
7898
     * @param string   $str
7899
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7900
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7901
     *
7902
     * @return string[]
7903
     */
7904 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7905
    {
7906 17
        if ($str === '') {
7907 1
            return $removeEmptyValues === true ? [] : [''];
7908
        }
7909
7910 16
        if (self::$SUPPORT['mbstring'] === true) {
7911
            /** @noinspection PhpComposerExtensionStubsInspection */
7912 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7913
        } else {
7914
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7915
        }
7916
7917 16
        if ($return === false) {
7918
            return $removeEmptyValues === true ? [] : [''];
7919
        }
7920
7921
        if (
7922 16
            $removeShortValues === null
7923
            &&
7924 16
            $removeEmptyValues === false
7925
        ) {
7926 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7927
        }
7928
7929
        return self::reduce_string_array(
7930
            $return,
7931
            $removeEmptyValues,
7932
            $removeShortValues
7933
        );
7934
    }
7935
7936
    /**
7937
     * Convert a string into an array of words.
7938
     *
7939
     * @param string   $str
7940
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7941
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7942
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7943
     *
7944
     * @return string[]
7945
     */
7946 13
    public static function str_to_words(
7947
        string $str,
7948
        string $charList = '',
7949
        bool $removeEmptyValues = false,
7950
        int $removeShortValues = null
7951
    ): array {
7952 13
        if ($str === '') {
7953 4
            return $removeEmptyValues === true ? [] : [''];
7954
        }
7955
7956 13
        $charList = self::rxClass($charList, '\pL');
7957
7958 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7959 13
        if ($return === false) {
7960
            return $removeEmptyValues === true ? [] : [''];
7961
        }
7962
7963
        if (
7964 13
            $removeShortValues === null
7965
            &&
7966 13
            $removeEmptyValues === false
7967
        ) {
7968 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7969
        }
7970
7971 2
        $tmpReturn = self::reduce_string_array(
7972 2
            $return,
7973 2
            $removeEmptyValues,
7974 2
            $removeShortValues
7975
        );
7976
7977 2
        foreach ($tmpReturn as &$item) {
7978 2
            $item = (string) $item;
7979
        }
7980
7981 2
        return $tmpReturn;
7982
    }
7983
7984
    /**
7985
     * alias for "UTF8::to_ascii()"
7986
     *
7987
     * @see UTF8::to_ascii()
7988
     *
7989
     * @param string $str
7990
     * @param string $unknown
7991
     * @param bool   $strict
7992
     *
7993
     * @return string
7994
     */
7995 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7996
    {
7997 8
        return self::to_ascii($str, $unknown, $strict);
7998
    }
7999
8000
    /**
8001
     * Truncates the string to a given length. If $substring is provided, and
8002
     * truncating occurs, the string is further truncated so that the substring
8003
     * may be appended without exceeding the desired length.
8004
     *
8005
     * @param string $str
8006
     * @param int    $length    <p>Desired length of the truncated string.</p>
8007
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8008
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8009
     *
8010
     * @return string string after truncating
8011
     */
8012 22
    public static function str_truncate(
8013
        string $str,
8014
        int $length,
8015
        string $substring = '',
8016
        string $encoding = 'UTF-8'
8017
    ): string {
8018 22
        if ($str === '') {
8019
            return '';
8020
        }
8021
8022 22
        if ($encoding === 'UTF-8') {
8023 10
            if ($length >= (int) \mb_strlen($str)) {
8024 2
                return $str;
8025
            }
8026
8027 8
            if ($substring !== '') {
8028 4
                $length -= (int) \mb_strlen($substring);
8029
8030
                /** @noinspection UnnecessaryCastingInspection */
8031 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8032
            }
8033
8034
            /** @noinspection UnnecessaryCastingInspection */
8035 4
            return (string) \mb_substr($str, 0, $length);
8036
        }
8037
8038 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8039
8040 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8041 2
            return $str;
8042
        }
8043
8044 10
        if ($substring !== '') {
8045 6
            $length -= (int) self::strlen($substring, $encoding);
8046
        }
8047
8048
        return (
8049 10
            (string) self::substr(
8050 10
                $str,
8051 10
                0,
8052 10
                $length,
8053 10
                $encoding
8054
            )
8055 10
       ) . $substring;
8056
    }
8057
8058
    /**
8059
     * Truncates the string to a given length, while ensuring that it does not
8060
     * split words. If $substring is provided, and truncating occurs, the
8061
     * string is further truncated so that the substring may be appended without
8062
     * exceeding the desired length.
8063
     *
8064
     * @param string $str
8065
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8066
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8067
     *                                                ''</p>
8068
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8069
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8070
     *
8071
     * @return string string after truncating
8072
     */
8073 47
    public static function str_truncate_safe(
8074
        string $str,
8075
        int $length,
8076
        string $substring = '',
8077
        string $encoding = 'UTF-8',
8078
        bool $ignoreDoNotSplitWordsForOneWord = false
8079
    ): string {
8080 47
        if ($str === '' || $length <= 0) {
8081 1
            return $substring;
8082
        }
8083
8084 47
        if ($encoding === 'UTF-8') {
8085 21
            if ($length >= (int) \mb_strlen($str)) {
8086 5
                return $str;
8087
            }
8088
8089
            // need to further trim the string so we can append the substring
8090 17
            $length -= (int) \mb_strlen($substring);
8091 17
            if ($length <= 0) {
8092 1
                return $substring;
8093
            }
8094
8095 17
            $truncated = \mb_substr($str, 0, $length);
8096
8097 17
            if ($truncated === false) {
8098
                return '';
8099
            }
8100
8101
            // if the last word was truncated
8102 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8103 17
            if ($strPosSpace !== $length) {
8104
                // find pos of the last occurrence of a space, get up to that
8105 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8106
8107
                if (
8108 13
                    $lastPos !== false
8109
                    ||
8110 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8111
                ) {
8112 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8113
                }
8114
            }
8115
        } else {
8116 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8117
8118 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8119 4
                return $str;
8120
            }
8121
8122
            // need to further trim the string so we can append the substring
8123 22
            $length -= (int) self::strlen($substring, $encoding);
8124 22
            if ($length <= 0) {
8125
                return $substring;
8126
            }
8127
8128 22
            $truncated = self::substr($str, 0, $length, $encoding);
8129
8130 22
            if ($truncated === false) {
8131
                return '';
8132
            }
8133
8134
            // if the last word was truncated
8135 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8136 22
            if ($strPosSpace !== $length) {
8137
                // find pos of the last occurrence of a space, get up to that
8138 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8139
8140
                if (
8141 12
                    $lastPos !== false
8142
                    ||
8143 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8144
                ) {
8145 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8146
                }
8147
            }
8148
        }
8149
8150 39
        return $truncated . $substring;
8151
    }
8152
8153
    /**
8154
     * Returns a lowercase and trimmed string separated by underscores.
8155
     * Underscores are inserted before uppercase characters (with the exception
8156
     * of the first character of the string), and in place of spaces as well as
8157
     * dashes.
8158
     *
8159
     * @param string $str
8160
     *
8161
     * @return string the underscored string
8162
     */
8163 16
    public static function str_underscored(string $str): string
8164
    {
8165 16
        return self::str_delimit($str, '_');
8166
    }
8167
8168
    /**
8169
     * Returns an UpperCamelCase version of the supplied string. It trims
8170
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8171
     * and underscores, and removes spaces, dashes, underscores.
8172
     *
8173
     * @param string      $str                   <p>The input string.</p>
8174
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8175
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8176
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8177
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8178
     *
8179
     * @return string string in UpperCamelCase
8180
     */
8181 13
    public static function str_upper_camelize(
8182
        string $str,
8183
        string $encoding = 'UTF-8',
8184
        bool $cleanUtf8 = false,
8185
        string $lang = null,
8186
        bool $tryToKeepStringLength = false
8187
    ): string {
8188 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8189
    }
8190
8191
    /**
8192
     * alias for "UTF8::ucfirst()"
8193
     *
8194
     * @see UTF8::ucfirst()
8195
     *
8196
     * @param string      $str
8197
     * @param string      $encoding
8198
     * @param bool        $cleanUtf8
8199
     * @param string|null $lang
8200
     * @param bool        $tryToKeepStringLength
8201
     *
8202
     * @return string
8203
     */
8204 39
    public static function str_upper_first(
8205
        string $str,
8206
        string $encoding = 'UTF-8',
8207
        bool $cleanUtf8 = false,
8208
        string $lang = null,
8209
        bool $tryToKeepStringLength = false
8210
    ): string {
8211 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8212
    }
8213
8214
    /**
8215
     * Counts number of words in the UTF-8 string.
8216
     *
8217
     * @param string $str      <p>The input string.</p>
8218
     * @param int    $format   [optional] <p>
8219
     *                         <strong>0</strong> => return a number of words (default)<br>
8220
     *                         <strong>1</strong> => return an array of words<br>
8221
     *                         <strong>2</strong> => return an array of words with word-offset as key
8222
     *                         </p>
8223
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8224
     *
8225
     * @return int|string[] The number of words in the string
8226
     */
8227 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8228
    {
8229 2
        $strParts = self::str_to_words($str, $charlist);
8230
8231 2
        $len = \count($strParts);
8232
8233 2
        if ($format === 1) {
8234 2
            $numberOfWords = [];
8235 2
            for ($i = 1; $i < $len; $i += 2) {
8236 2
                $numberOfWords[] = $strParts[$i];
8237
            }
8238 2
        } elseif ($format === 2) {
8239 2
            $numberOfWords = [];
8240 2
            $offset = (int) self::strlen($strParts[0]);
8241 2
            for ($i = 1; $i < $len; $i += 2) {
8242 2
                $numberOfWords[$offset] = $strParts[$i];
8243 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8244
            }
8245
        } else {
8246 2
            $numberOfWords = (int) (($len - 1) / 2);
8247
        }
8248
8249 2
        return $numberOfWords;
8250
    }
8251
8252
    /**
8253
     * Case-insensitive string comparison.
8254
     *
8255
     * INFO: Case-insensitive version of UTF8::strcmp()
8256
     *
8257
     * @param string $str1     <p>The first string.</p>
8258
     * @param string $str2     <p>The second string.</p>
8259
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8260
     *
8261
     * @return int
8262
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8263
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8264
     *             <strong>0</strong> if they are equal
8265
     */
8266 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8267
    {
8268 23
        return self::strcmp(
8269 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8270 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8271
        );
8272
    }
8273
8274
    /**
8275
     * alias for "UTF8::strstr()"
8276
     *
8277
     * @see UTF8::strstr()
8278
     *
8279
     * @param string $haystack
8280
     * @param string $needle
8281
     * @param bool   $before_needle
8282
     * @param string $encoding
8283
     * @param bool   $cleanUtf8
8284
     *
8285
     * @return false|string
8286
     */
8287 2
    public static function strchr(
8288
        string $haystack,
8289
        string $needle,
8290
        bool $before_needle = false,
8291
        string $encoding = 'UTF-8',
8292
        bool $cleanUtf8 = false
8293
    ) {
8294 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8295
    }
8296
8297
    /**
8298
     * Case-sensitive string comparison.
8299
     *
8300
     * @param string $str1 <p>The first string.</p>
8301
     * @param string $str2 <p>The second string.</p>
8302
     *
8303
     * @return int
8304
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8305
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8306
     *             <strong>0</strong> if they are equal
8307
     */
8308 29
    public static function strcmp(string $str1, string $str2): int
8309
    {
8310 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8311 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8312 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8313
        );
8314
    }
8315
8316
    /**
8317
     * Find length of initial segment not matching mask.
8318
     *
8319
     * @param string $str
8320
     * @param string $charList
8321
     * @param int    $offset
8322
     * @param int    $length
8323
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8324
     *
8325
     * @return int
8326
     */
8327 12
    public static function strcspn(
8328
        string $str,
8329
        string $charList,
8330
        int $offset = null,
8331
        int $length = null,
8332
        string $encoding = 'UTF-8'
8333
    ): int {
8334 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8335
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8336
        }
8337
8338 12
        if ($charList === '') {
8339 2
            return (int) self::strlen($str, $encoding);
8340
        }
8341
8342 11
        if ($offset !== null || $length !== null) {
8343 3
            if ($encoding === 'UTF-8') {
8344 3
                if ($length === null) {
8345
                    /** @noinspection UnnecessaryCastingInspection */
8346 2
                    $strTmp = \mb_substr($str, (int) $offset);
8347
                } else {
8348
                    /** @noinspection UnnecessaryCastingInspection */
8349 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8350
                }
8351
            } else {
8352
                /** @noinspection UnnecessaryCastingInspection */
8353
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8354
            }
8355
8356 3
            if ($strTmp === false) {
8357
                return 0;
8358
            }
8359
8360 3
            $str = $strTmp;
8361
        }
8362
8363 11
        if ($str === '') {
8364 2
            return 0;
8365
        }
8366
8367 10
        $matches = [];
8368 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8369 9
            $return = self::strlen($matches[1], $encoding);
8370 9
            if ($return === false) {
8371
                return 0;
8372
            }
8373
8374 9
            return $return;
8375
        }
8376
8377 2
        return (int) self::strlen($str, $encoding);
8378
    }
8379
8380
    /**
8381
     * alias for "UTF8::stristr()"
8382
     *
8383
     * @see UTF8::stristr()
8384
     *
8385
     * @param string $haystack
8386
     * @param string $needle
8387
     * @param bool   $before_needle
8388
     * @param string $encoding
8389
     * @param bool   $cleanUtf8
8390
     *
8391
     * @return false|string
8392
     */
8393 1
    public static function strichr(
8394
        string $haystack,
8395
        string $needle,
8396
        bool $before_needle = false,
8397
        string $encoding = 'UTF-8',
8398
        bool $cleanUtf8 = false
8399
    ) {
8400 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8401
    }
8402
8403
    /**
8404
     * Create a UTF-8 string from code points.
8405
     *
8406
     * INFO: opposite to UTF8::codepoints()
8407
     *
8408
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8409
     *
8410
     * @return string UTF-8 encoded string
8411
     */
8412 4
    public static function string(array $array): string
8413
    {
8414 4
        return \implode(
8415 4
            '',
8416 4
            \array_map(
8417
                [
8418 4
                    self::class,
8419
                    'chr',
8420
                ],
8421 4
                $array
8422
            )
8423
        );
8424
    }
8425
8426
    /**
8427
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8428
     *
8429
     * @param string $str <p>The input string.</p>
8430
     *
8431
     * @return bool
8432
     *              <strong>true</strong> if the string has BOM at the start,<br>
8433
     *              <strong>false</strong> otherwise
8434
     */
8435 6
    public static function string_has_bom(string $str): bool
8436
    {
8437
        /** @noinspection PhpUnusedLocalVariableInspection */
8438 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8439 6
            if (\strpos($str, $bomString) === 0) {
8440 6
                return true;
8441
            }
8442
        }
8443
8444 6
        return false;
8445
    }
8446
8447
    /**
8448
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8449
     *
8450
     * @see http://php.net/manual/en/function.strip-tags.php
8451
     *
8452
     * @param string $str            <p>
8453
     *                               The input string.
8454
     *                               </p>
8455
     * @param string $allowable_tags [optional] <p>
8456
     *                               You can use the optional second parameter to specify tags which should
8457
     *                               not be stripped.
8458
     *                               </p>
8459
     *                               <p>
8460
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8461
     *                               can not be changed with allowable_tags.
8462
     *                               </p>
8463
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8464
     *
8465
     * @return string the stripped string
8466
     */
8467 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8468
    {
8469 4
        if ($str === '') {
8470 1
            return '';
8471
        }
8472
8473 4
        if ($cleanUtf8 === true) {
8474 2
            $str = self::clean($str);
8475
        }
8476
8477 4
        if ($allowable_tags === null) {
8478 4
            return \strip_tags($str);
8479
        }
8480
8481 2
        return \strip_tags($str, $allowable_tags);
8482
    }
8483
8484
    /**
8485
     * Strip all whitespace characters. This includes tabs and newline
8486
     * characters, as well as multibyte whitespace such as the thin space
8487
     * and ideographic space.
8488
     *
8489
     * @param string $str
8490
     *
8491
     * @return string
8492
     */
8493 36
    public static function strip_whitespace(string $str): string
8494
    {
8495 36
        if ($str === '') {
8496 3
            return '';
8497
        }
8498
8499 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8500
    }
8501
8502
    /**
8503
     * Finds position of first occurrence of a string within another, case insensitive.
8504
     *
8505
     * @see http://php.net/manual/en/function.mb-stripos.php
8506
     *
8507
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8508
     * @param string $needle    <p>The string to find in haystack.</p>
8509
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8510
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8511
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8512
     *
8513
     * @return false|int
8514
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8515
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8516
     */
8517 24
    public static function stripos(
8518
        string $haystack,
8519
        string $needle,
8520
        int $offset = 0,
8521
        $encoding = 'UTF-8',
8522
        bool $cleanUtf8 = false
8523
    ) {
8524 24
        if ($haystack === '' || $needle === '') {
8525 5
            return false;
8526
        }
8527
8528 23
        if ($cleanUtf8 === true) {
8529
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8530
            // if invalid characters are found in $haystack before $needle
8531 1
            $haystack = self::clean($haystack);
8532 1
            $needle = self::clean($needle);
8533
        }
8534
8535 23
        if (self::$SUPPORT['mbstring'] === true) {
8536 23
            if ($encoding === 'UTF-8') {
8537 23
                return \mb_stripos($haystack, $needle, $offset);
8538
            }
8539
8540 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8541
8542 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8543
        }
8544
8545 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8546
8547
        if (
8548 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8549
            &&
8550 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8551
            &&
8552 2
            self::$SUPPORT['intl'] === true
8553
        ) {
8554
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8555
            if ($returnTmp !== false) {
8556
                return $returnTmp;
8557
            }
8558
        }
8559
8560
        //
8561
        // fallback for ascii only
8562
        //
8563
8564 2
        if (self::is_ascii($haystack . $needle)) {
8565
            return \stripos($haystack, $needle, $offset);
8566
        }
8567
8568
        //
8569
        // fallback via vanilla php
8570
        //
8571
8572 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8573 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8574
8575 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8576
    }
8577
8578
    /**
8579
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8580
     *
8581
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8582
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8583
     * @param bool   $before_needle [optional] <p>
8584
     *                              If <b>TRUE</b>, it returns the part of the
8585
     *                              haystack before the first occurrence of the needle (excluding the needle).
8586
     *                              </p>
8587
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8588
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8589
     *
8590
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8591
     */
8592 12
    public static function stristr(
8593
        string $haystack,
8594
        string $needle,
8595
        bool $before_needle = false,
8596
        string $encoding = 'UTF-8',
8597
        bool $cleanUtf8 = false
8598
    ) {
8599 12
        if ($haystack === '' || $needle === '') {
8600 3
            return false;
8601
        }
8602
8603 9
        if ($cleanUtf8 === true) {
8604
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8605
            // if invalid characters are found in $haystack before $needle
8606 1
            $needle = self::clean($needle);
8607 1
            $haystack = self::clean($haystack);
8608
        }
8609
8610 9
        if (!$needle) {
8611
            return $haystack;
8612
        }
8613
8614 9
        if (self::$SUPPORT['mbstring'] === true) {
8615 9
            if ($encoding === 'UTF-8') {
8616 9
                return \mb_stristr($haystack, $needle, $before_needle);
8617
            }
8618
8619 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8620
8621 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8622
        }
8623
8624
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8625
8626
        if (
8627
            $encoding !== 'UTF-8'
8628
            &&
8629
            self::$SUPPORT['mbstring'] === false
8630
        ) {
8631
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8632
        }
8633
8634
        if (
8635
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8636
            &&
8637
            self::$SUPPORT['intl'] === true
8638
        ) {
8639
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8640
            if ($returnTmp !== false) {
8641
                return $returnTmp;
8642
            }
8643
        }
8644
8645
        if (self::is_ascii($needle . $haystack)) {
8646
            return \stristr($haystack, $needle, $before_needle);
8647
        }
8648
8649
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8650
8651
        if (!isset($match[1])) {
8652
            return false;
8653
        }
8654
8655
        if ($before_needle) {
8656
            return $match[1];
8657
        }
8658
8659
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8660
    }
8661
8662
    /**
8663
     * Get the string length, not the byte-length!
8664
     *
8665
     * @see     http://php.net/manual/en/function.mb-strlen.php
8666
     *
8667
     * @param string $str       <p>The string being checked for length.</p>
8668
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8669
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8670
     *
8671
     * @return false|int
8672
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8673
     *                   $encoding.
8674
     *                   (One multi-byte character counted as +1).
8675
     *                   <br>
8676
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8677
     *                   chars.
8678
     */
8679 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8680
    {
8681 173
        if ($str === '') {
8682 21
            return 0;
8683
        }
8684
8685 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8686 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8687
        }
8688
8689 171
        if ($cleanUtf8 === true) {
8690
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8691
            // if invalid characters are found in $str
8692 4
            $str = self::clean($str);
8693
        }
8694
8695
        //
8696
        // fallback via mbstring
8697
        //
8698
8699 171
        if (self::$SUPPORT['mbstring'] === true) {
8700 165
            if ($encoding === 'UTF-8') {
8701 165
                return \mb_strlen($str);
8702
            }
8703
8704 4
            return \mb_strlen($str, $encoding);
8705
        }
8706
8707
        //
8708
        // fallback for binary || ascii only
8709
        //
8710
8711
        if (
8712 8
            $encoding === 'CP850'
8713
            ||
8714 8
            $encoding === 'ASCII'
8715
        ) {
8716
            return \strlen($str);
8717
        }
8718
8719
        if (
8720 8
            $encoding !== 'UTF-8'
8721
            &&
8722 8
            self::$SUPPORT['mbstring'] === false
8723
            &&
8724 8
            self::$SUPPORT['iconv'] === false
8725
        ) {
8726 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8727
        }
8728
8729
        //
8730
        // fallback via iconv
8731
        //
8732
8733 8
        if (self::$SUPPORT['iconv'] === true) {
8734
            $returnTmp = \iconv_strlen($str, $encoding);
8735
            if ($returnTmp !== false) {
8736
                return $returnTmp;
8737
            }
8738
        }
8739
8740
        //
8741
        // fallback via intl
8742
        //
8743
8744
        if (
8745 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8746
            &&
8747 8
            self::$SUPPORT['intl'] === true
8748
        ) {
8749
            $returnTmp = \grapheme_strlen($str);
8750
            if ($returnTmp !== null) {
8751
                return $returnTmp;
8752
            }
8753
        }
8754
8755
        //
8756
        // fallback for ascii only
8757
        //
8758
8759 8
        if (self::is_ascii($str)) {
8760 4
            return \strlen($str);
8761
        }
8762
8763
        //
8764
        // fallback via vanilla php
8765
        //
8766
8767 8
        \preg_match_all('/./us', $str, $parts);
8768
8769 8
        $returnTmp = \count($parts[0]);
8770 8
        if ($returnTmp === 0) {
8771
            return false;
8772
        }
8773
8774 8
        return $returnTmp;
8775
    }
8776
8777
    /**
8778
     * Get string length in byte.
8779
     *
8780
     * @param string $str
8781
     *
8782
     * @return int
8783
     */
8784
    public static function strlen_in_byte(string $str): int
8785
    {
8786
        if ($str === '') {
8787
            return 0;
8788
        }
8789
8790
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8791
            // "mb_" is available if overload is used, so use it ...
8792
            return \mb_strlen($str, 'CP850'); // 8-BIT
8793
        }
8794
8795
        return \strlen($str);
8796
    }
8797
8798
    /**
8799
     * Case insensitive string comparisons using a "natural order" algorithm.
8800
     *
8801
     * INFO: natural order version of UTF8::strcasecmp()
8802
     *
8803
     * @param string $str1     <p>The first string.</p>
8804
     * @param string $str2     <p>The second string.</p>
8805
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8806
     *
8807
     * @return int
8808
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8809
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8810
     *             <strong>0</strong> if they are equal
8811
     */
8812 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8813
    {
8814 2
        return self::strnatcmp(
8815 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8816 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8817
        );
8818
    }
8819
8820
    /**
8821
     * String comparisons using a "natural order" algorithm
8822
     *
8823
     * INFO: natural order version of UTF8::strcmp()
8824
     *
8825
     * @see  http://php.net/manual/en/function.strnatcmp.php
8826
     *
8827
     * @param string $str1 <p>The first string.</p>
8828
     * @param string $str2 <p>The second string.</p>
8829
     *
8830
     * @return int
8831
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8832
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8833
     *             <strong>0</strong> if they are equal
8834
     */
8835 4
    public static function strnatcmp(string $str1, string $str2): int
8836
    {
8837 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8838
    }
8839
8840
    /**
8841
     * Case-insensitive string comparison of the first n characters.
8842
     *
8843
     * @see  http://php.net/manual/en/function.strncasecmp.php
8844
     *
8845
     * @param string $str1     <p>The first string.</p>
8846
     * @param string $str2     <p>The second string.</p>
8847
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8848
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8849
     *
8850
     * @return int
8851
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8852
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8853
     *             <strong>0</strong> if they are equal
8854
     */
8855 2
    public static function strncasecmp(
8856
        string $str1,
8857
        string $str2,
8858
        int $len,
8859
        string $encoding = 'UTF-8'
8860
    ): int {
8861 2
        return self::strncmp(
8862 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8863 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8864 2
            $len
8865
        );
8866
    }
8867
8868
    /**
8869
     * String comparison of the first n characters.
8870
     *
8871
     * @see  http://php.net/manual/en/function.strncmp.php
8872
     *
8873
     * @param string $str1     <p>The first string.</p>
8874
     * @param string $str2     <p>The second string.</p>
8875
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8876
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8877
     *
8878
     * @return int
8879
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8880
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8881
     *             <strong>0</strong> if they are equal
8882
     */
8883 4
    public static function strncmp(
8884
        string $str1,
8885
        string $str2,
8886
        int $len,
8887
        string $encoding = 'UTF-8'
8888
    ): int {
8889 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8890
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8891
        }
8892
8893 4
        if ($encoding === 'UTF-8') {
8894 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8895 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8896
        } else {
8897
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8898
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8899
        }
8900
8901 4
        return self::strcmp($str1, $str2);
8902
    }
8903
8904
    /**
8905
     * Search a string for any of a set of characters.
8906
     *
8907
     * @see  http://php.net/manual/en/function.strpbrk.php
8908
     *
8909
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8910
     * @param string $char_list <p>This parameter is case sensitive.</p>
8911
     *
8912
     * @return false|string string starting from the character found, or false if it is not found
8913
     */
8914 2
    public static function strpbrk(string $haystack, string $char_list)
8915
    {
8916 2
        if ($haystack === '' || $char_list === '') {
8917 2
            return false;
8918
        }
8919
8920 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8921 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8922
        }
8923
8924 2
        return false;
8925
    }
8926
8927
    /**
8928
     * Find position of first occurrence of string in a string.
8929
     *
8930
     * @see http://php.net/manual/en/function.mb-strpos.php
8931
     *
8932
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8933
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8934
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8935
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8936
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8937
     *
8938
     * @return false|int
8939
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8940
     *                   string.<br> If needle is not found it returns false.
8941
     */
8942 53
    public static function strpos(
8943
        string $haystack,
8944
        $needle,
8945
        int $offset = 0,
8946
        $encoding = 'UTF-8',
8947
        bool $cleanUtf8 = false
8948
    ) {
8949 53
        if ($haystack === '') {
8950 4
            return false;
8951
        }
8952
8953
        // iconv and mbstring do not support integer $needle
8954 52
        if ((int) $needle === $needle) {
8955
            $needle = (string) self::chr($needle);
8956
        }
8957 52
        $needle = (string) $needle;
8958
8959 52
        if ($needle === '') {
8960 2
            return false;
8961
        }
8962
8963 52
        if ($cleanUtf8 === true) {
8964
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8965
            // if invalid characters are found in $haystack before $needle
8966 3
            $needle = self::clean($needle);
8967 3
            $haystack = self::clean($haystack);
8968
        }
8969
8970 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8971 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8972
        }
8973
8974
        //
8975
        // fallback via mbstring
8976
        //
8977
8978 52
        if (self::$SUPPORT['mbstring'] === true) {
8979 50
            if ($encoding === 'UTF-8') {
8980 50
                return \mb_strpos($haystack, $needle, $offset);
8981
            }
8982
8983 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8984
        }
8985
8986
        //
8987
        // fallback for binary || ascii only
8988
        //
8989
        if (
8990 4
            $encoding === 'CP850'
8991
            ||
8992 4
            $encoding === 'ASCII'
8993
        ) {
8994 2
            return \strpos($haystack, $needle, $offset);
8995
        }
8996
8997
        if (
8998 4
            $encoding !== 'UTF-8'
8999
            &&
9000 4
            self::$SUPPORT['iconv'] === false
9001
            &&
9002 4
            self::$SUPPORT['mbstring'] === false
9003
        ) {
9004 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9005
        }
9006
9007
        //
9008
        // fallback via intl
9009
        //
9010
9011
        if (
9012 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9013
            &&
9014 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9015
            &&
9016 4
            self::$SUPPORT['intl'] === true
9017
        ) {
9018
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9019
            if ($returnTmp !== false) {
9020
                return $returnTmp;
9021
            }
9022
        }
9023
9024
        //
9025
        // fallback via iconv
9026
        //
9027
9028
        if (
9029 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9030
            &&
9031 4
            self::$SUPPORT['iconv'] === true
9032
        ) {
9033
            // ignore invalid negative offset to keep compatibility
9034
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9035
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9036
            if ($returnTmp !== false) {
9037
                return $returnTmp;
9038
            }
9039
        }
9040
9041
        //
9042
        // fallback for ascii only
9043
        //
9044
9045 4
        if (self::is_ascii($haystack . $needle)) {
9046 2
            return \strpos($haystack, $needle, $offset);
9047
        }
9048
9049
        //
9050
        // fallback via vanilla php
9051
        //
9052
9053 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9054 4
        if ($haystackTmp === false) {
9055
            $haystackTmp = '';
9056
        }
9057 4
        $haystack = (string) $haystackTmp;
9058
9059 4
        if ($offset < 0) {
9060
            $offset = 0;
9061
        }
9062
9063 4
        $pos = \strpos($haystack, $needle);
9064 4
        if ($pos === false) {
9065 2
            return false;
9066
        }
9067
9068 4
        if ($pos) {
9069 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9070
        }
9071
9072 2
        return $offset + 0;
9073
    }
9074
9075
    /**
9076
     * Find position of first occurrence of string in a string.
9077
     *
9078
     * @param string $haystack <p>
9079
     *                         The string being checked.
9080
     *                         </p>
9081
     * @param string $needle   <p>
9082
     *                         The position counted from the beginning of haystack.
9083
     *                         </p>
9084
     * @param int    $offset   [optional] <p>
9085
     *                         The search offset. If it is not specified, 0 is used.
9086
     *                         </p>
9087
     *
9088
     * @return false|int The numeric position of the first occurrence of needle in the
9089
     *                   haystack string. If needle is not found, it returns false.
9090
     */
9091
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9092
    {
9093
        if ($haystack === '' || $needle === '') {
9094
            return false;
9095
        }
9096
9097
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9098
            // "mb_" is available if overload is used, so use it ...
9099
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9100
        }
9101
9102
        return \strpos($haystack, $needle, $offset);
9103
    }
9104
9105
    /**
9106
     * Finds the last occurrence of a character in a string within another.
9107
     *
9108
     * @see http://php.net/manual/en/function.mb-strrchr.php
9109
     *
9110
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9111
     * @param string $needle        <p>The string to find in haystack</p>
9112
     * @param bool   $before_needle [optional] <p>
9113
     *                              Determines which portion of haystack
9114
     *                              this function returns.
9115
     *                              If set to true, it returns all of haystack
9116
     *                              from the beginning to the last occurrence of needle.
9117
     *                              If set to false, it returns all of haystack
9118
     *                              from the last occurrence of needle to the end,
9119
     *                              </p>
9120
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9121
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9122
     *
9123
     * @return false|string the portion of haystack or false if needle is not found
9124
     */
9125 2
    public static function strrchr(
9126
        string $haystack,
9127
        string $needle,
9128
        bool $before_needle = false,
9129
        string $encoding = 'UTF-8',
9130
        bool $cleanUtf8 = false
9131
    ) {
9132 2
        if ($haystack === '' || $needle === '') {
9133 2
            return false;
9134
        }
9135
9136 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9137 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9138
        }
9139
9140 2
        if ($cleanUtf8 === true) {
9141
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9142
            // if invalid characters are found in $haystack before $needle
9143 2
            $needle = self::clean($needle);
9144 2
            $haystack = self::clean($haystack);
9145
        }
9146
9147
        //
9148
        // fallback via mbstring
9149
        //
9150
9151 2
        if (self::$SUPPORT['mbstring'] === true) {
9152 2
            if ($encoding === 'UTF-8') {
9153 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9154
            }
9155
9156 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9157
        }
9158
9159
        //
9160
        // fallback for binary || ascii only
9161
        //
9162
9163
        if (
9164
            $before_needle === false
9165
            &&
9166
            (
9167
                $encoding === 'CP850'
9168
                ||
9169
                $encoding === 'ASCII'
9170
            )
9171
        ) {
9172
            return \strrchr($haystack, $needle);
9173
        }
9174
9175
        if (
9176
            $encoding !== 'UTF-8'
9177
            &&
9178
            self::$SUPPORT['mbstring'] === false
9179
        ) {
9180
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9181
        }
9182
9183
        //
9184
        // fallback via iconv
9185
        //
9186
9187
        if (self::$SUPPORT['iconv'] === true) {
9188
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9189
            if ($needleTmp === false) {
9190
                return false;
9191
            }
9192
            $needle = (string) $needleTmp;
9193
9194
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9195
            if ($pos === false) {
9196
                return false;
9197
            }
9198
9199
            if ($before_needle) {
9200
                return self::substr($haystack, 0, $pos, $encoding);
9201
            }
9202
9203
            return self::substr($haystack, $pos, null, $encoding);
9204
        }
9205
9206
        //
9207
        // fallback via vanilla php
9208
        //
9209
9210
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9211
        if ($needleTmp === false) {
9212
            return false;
9213
        }
9214
        $needle = (string) $needleTmp;
9215
9216
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9217
        if ($pos === false) {
9218
            return false;
9219
        }
9220
9221
        if ($before_needle) {
9222
            return self::substr($haystack, 0, $pos, $encoding);
9223
        }
9224
9225
        return self::substr($haystack, $pos, null, $encoding);
9226
    }
9227
9228
    /**
9229
     * Reverses characters order in the string.
9230
     *
9231
     * @param string $str      <p>The input string.</p>
9232
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9233
     *
9234
     * @return string the string with characters in the reverse sequence
9235
     */
9236 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9237
    {
9238 10
        if ($str === '') {
9239 4
            return '';
9240
        }
9241
9242
        // init
9243 8
        $reversed = '';
9244
9245 8
        $str = self::emoji_encode($str, true);
9246
9247 8
        if ($encoding === 'UTF-8') {
9248 8
            if (self::$SUPPORT['intl'] === true) {
9249
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9250 8
                $i = (int) \grapheme_strlen($str);
9251 8
                while ($i--) {
9252 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9253 8
                    if ($reversedTmp !== false) {
9254 8
                        $reversed .= $reversedTmp;
9255
                    }
9256
                }
9257
            } else {
9258
                $i = (int) \mb_strlen($str);
9259 8
                while ($i--) {
9260
                    $reversedTmp = \mb_substr($str, $i, 1);
9261
                    if ($reversedTmp !== false) {
9262
                        $reversed .= $reversedTmp;
9263
                    }
9264
                }
9265
            }
9266
        } else {
9267
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9268
9269
            $i = (int) self::strlen($str, $encoding);
9270
            while ($i--) {
9271
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9272
                if ($reversedTmp !== false) {
9273
                    $reversed .= $reversedTmp;
9274
                }
9275
            }
9276
        }
9277
9278 8
        return self::emoji_decode($reversed, true);
9279
    }
9280
9281
    /**
9282
     * Finds the last occurrence of a character in a string within another, case insensitive.
9283
     *
9284
     * @see http://php.net/manual/en/function.mb-strrichr.php
9285
     *
9286
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9287
     * @param string $needle        <p>The string to find in haystack.</p>
9288
     * @param bool   $before_needle [optional] <p>
9289
     *                              Determines which portion of haystack
9290
     *                              this function returns.
9291
     *                              If set to true, it returns all of haystack
9292
     *                              from the beginning to the last occurrence of needle.
9293
     *                              If set to false, it returns all of haystack
9294
     *                              from the last occurrence of needle to the end,
9295
     *                              </p>
9296
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9297
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9298
     *
9299
     * @return false|string the portion of haystack or<br>false if needle is not found
9300
     */
9301 3
    public static function strrichr(
9302
        string $haystack,
9303
        string $needle,
9304
        bool $before_needle = false,
9305
        string $encoding = 'UTF-8',
9306
        bool $cleanUtf8 = false
9307
    ) {
9308 3
        if ($haystack === '' || $needle === '') {
9309 2
            return false;
9310
        }
9311
9312 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9313 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9314
        }
9315
9316 3
        if ($cleanUtf8 === true) {
9317
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9318
            // if invalid characters are found in $haystack before $needle
9319 2
            $needle = self::clean($needle);
9320 2
            $haystack = self::clean($haystack);
9321
        }
9322
9323
        //
9324
        // fallback via mbstring
9325
        //
9326
9327 3
        if (self::$SUPPORT['mbstring'] === true) {
9328 3
            if ($encoding === 'UTF-8') {
9329 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9330
            }
9331
9332 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9333
        }
9334
9335
        //
9336
        // fallback via vanilla php
9337
        //
9338
9339
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9340
        if ($needleTmp === false) {
9341
            return false;
9342
        }
9343
        $needle = (string) $needleTmp;
9344
9345
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9346
        if ($pos === false) {
9347
            return false;
9348
        }
9349
9350
        if ($before_needle) {
9351
            return self::substr($haystack, 0, $pos, $encoding);
9352
        }
9353
9354
        return self::substr($haystack, $pos, null, $encoding);
9355
    }
9356
9357
    /**
9358
     * Find position of last occurrence of a case-insensitive string.
9359
     *
9360
     * @param string     $haystack  <p>The string to look in.</p>
9361
     * @param int|string $needle    <p>The string to look for.</p>
9362
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9363
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9364
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9365
     *
9366
     * @return false|int
9367
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9368
     *                   string.<br>If needle is not found, it returns false.
9369
     */
9370 3
    public static function strripos(
9371
        string $haystack,
9372
        $needle,
9373
        int $offset = 0,
9374
        string $encoding = 'UTF-8',
9375
        bool $cleanUtf8 = false
9376
    ) {
9377 3
        if ($haystack === '') {
9378
            return false;
9379
        }
9380
9381
        // iconv and mbstring do not support integer $needle
9382 3
        if ((int) $needle === $needle && $needle >= 0) {
9383
            $needle = (string) self::chr($needle);
9384
        }
9385 3
        $needle = (string) $needle;
9386
9387 3
        if ($needle === '') {
9388
            return false;
9389
        }
9390
9391 3
        if ($cleanUtf8 === true) {
9392
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9393 2
            $needle = self::clean($needle);
9394 2
            $haystack = self::clean($haystack);
9395
        }
9396
9397 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9398 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9399
        }
9400
9401
        //
9402
        // fallback via mbstrig
9403
        //
9404
9405 3
        if (self::$SUPPORT['mbstring'] === true) {
9406 3
            if ($encoding === 'UTF-8') {
9407 3
                return \mb_strripos($haystack, $needle, $offset);
9408
            }
9409
9410
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9411
        }
9412
9413
        //
9414
        // fallback for binary || ascii only
9415
        //
9416
9417
        if (
9418
            $encoding === 'CP850'
9419
            ||
9420
            $encoding === 'ASCII'
9421
        ) {
9422
            return \strripos($haystack, $needle, $offset);
9423
        }
9424
9425
        if (
9426
            $encoding !== 'UTF-8'
9427
            &&
9428
            self::$SUPPORT['mbstring'] === false
9429
        ) {
9430
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9431
        }
9432
9433
        //
9434
        // fallback via intl
9435
        //
9436
9437
        if (
9438
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9439
            &&
9440
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9441
            &&
9442
            self::$SUPPORT['intl'] === true
9443
        ) {
9444
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9445
            if ($returnTmp !== false) {
9446
                return $returnTmp;
9447
            }
9448
        }
9449
9450
        //
9451
        // fallback for ascii only
9452
        //
9453
9454
        if (self::is_ascii($haystack . $needle)) {
9455
            return \strripos($haystack, $needle, $offset);
9456
        }
9457
9458
        //
9459
        // fallback via vanilla php
9460
        //
9461
9462
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9463
        $needle = self::strtocasefold($needle, true, false, $encoding);
9464
9465
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9466
    }
9467
9468
    /**
9469
     * Finds position of last occurrence of a string within another, case insensitive.
9470
     *
9471
     * @param string $haystack <p>
9472
     *                         The string from which to get the position of the last occurrence
9473
     *                         of needle.
9474
     *                         </p>
9475
     * @param string $needle   <p>
9476
     *                         The string to find in haystack.
9477
     *                         </p>
9478
     * @param int    $offset   [optional] <p>
9479
     *                         The position in haystack
9480
     *                         to start searching.
9481
     *                         </p>
9482
     *
9483
     * @return false|int return the numeric position of the last occurrence of needle in the
9484
     *                   haystack string, or false if needle is not found
9485
     */
9486
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9487
    {
9488
        if ($haystack === '' || $needle === '') {
9489
            return false;
9490
        }
9491
9492
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9493
            // "mb_" is available if overload is used, so use it ...
9494
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9495
        }
9496
9497
        return \strripos($haystack, $needle, $offset);
9498
    }
9499
9500
    /**
9501
     * Find position of last occurrence of a string in a string.
9502
     *
9503
     * @see http://php.net/manual/en/function.mb-strrpos.php
9504
     *
9505
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9506
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9507
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9508
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9509
     *                              the end of the string.
9510
     *                              </p>
9511
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9512
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9513
     *
9514
     * @return false|int
9515
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9516
     *                   string.<br>If needle is not found, it returns false.
9517
     */
9518 35
    public static function strrpos(
9519
        string $haystack,
9520
        $needle,
9521
        int $offset = 0,
9522
        string $encoding = 'UTF-8',
9523
        bool $cleanUtf8 = false
9524
    ) {
9525 35
        if ($haystack === '') {
9526 3
            return false;
9527
        }
9528
9529
        // iconv and mbstring do not support integer $needle
9530 34
        if ((int) $needle === $needle && $needle >= 0) {
9531 2
            $needle = (string) self::chr($needle);
9532
        }
9533 34
        $needle = (string) $needle;
9534
9535 34
        if ($needle === '') {
9536 2
            return false;
9537
        }
9538
9539 34
        if ($cleanUtf8 === true) {
9540
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9541 4
            $needle = self::clean($needle);
9542 4
            $haystack = self::clean($haystack);
9543
        }
9544
9545 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9546 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9547
        }
9548
9549
        //
9550
        // fallback via mbstring
9551
        //
9552
9553 34
        if (self::$SUPPORT['mbstring'] === true) {
9554 34
            if ($encoding === 'UTF-8') {
9555 34
                return \mb_strrpos($haystack, $needle, $offset);
9556
            }
9557
9558 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9559
        }
9560
9561
        //
9562
        // fallback for binary || ascii only
9563
        //
9564
9565
        if (
9566
            $encoding === 'CP850'
9567
            ||
9568
            $encoding === 'ASCII'
9569
        ) {
9570
            return \strrpos($haystack, $needle, $offset);
9571
        }
9572
9573
        if (
9574
            $encoding !== 'UTF-8'
9575
            &&
9576
            self::$SUPPORT['mbstring'] === false
9577
        ) {
9578
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9579
        }
9580
9581
        //
9582
        // fallback via intl
9583
        //
9584
9585
        if (
9586
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9587
            &&
9588
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9589
            &&
9590
            self::$SUPPORT['intl'] === true
9591
        ) {
9592
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9593
            if ($returnTmp !== false) {
9594
                return $returnTmp;
9595
            }
9596
        }
9597
9598
        //
9599
        // fallback for ascii only
9600
        //
9601
9602
        if (self::is_ascii($haystack . $needle)) {
9603
            return \strrpos($haystack, $needle, $offset);
9604
        }
9605
9606
        //
9607
        // fallback via vanilla php
9608
        //
9609
9610
        $haystackTmp = null;
9611
        if ($offset > 0) {
9612
            $haystackTmp = self::substr($haystack, $offset);
9613
        } elseif ($offset < 0) {
9614
            $haystackTmp = self::substr($haystack, 0, $offset);
9615
            $offset = 0;
9616
        }
9617
9618
        if ($haystackTmp !== null) {
9619
            if ($haystackTmp === false) {
9620
                $haystackTmp = '';
9621
            }
9622
            $haystack = (string) $haystackTmp;
9623
        }
9624
9625
        $pos = \strrpos($haystack, $needle);
9626
        if ($pos === false) {
9627
            return false;
9628
        }
9629
9630
        $strTmp = \substr($haystack, 0, $pos);
9631
        if ($strTmp === false) {
9632
            return false;
9633
        }
9634
9635
        return $offset + (int) self::strlen($strTmp);
9636
    }
9637
9638
    /**
9639
     * Find position of last occurrence of a string in a string.
9640
     *
9641
     * @param string $haystack <p>
9642
     *                         The string being checked, for the last occurrence
9643
     *                         of needle.
9644
     *                         </p>
9645
     * @param string $needle   <p>
9646
     *                         The string to find in haystack.
9647
     *                         </p>
9648
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9649
     *                         the string. Negative values will stop searching at an arbitrary point
9650
     *                         prior to the end of the string.
9651
     *
9652
     * @return false|int The numeric position of the last occurrence of needle in the
9653
     *                   haystack string. If needle is not found, it returns false.
9654
     */
9655
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9656
    {
9657
        if ($haystack === '' || $needle === '') {
9658
            return false;
9659
        }
9660
9661
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9662
            // "mb_" is available if overload is used, so use it ...
9663
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9664
        }
9665
9666
        return \strrpos($haystack, $needle, $offset);
9667
    }
9668
9669
    /**
9670
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9671
     * mask.
9672
     *
9673
     * @param string $str      <p>The input string.</p>
9674
     * @param string $mask     <p>The mask of chars</p>
9675
     * @param int    $offset   [optional]
9676
     * @param int    $length   [optional]
9677
     * @param string $encoding [optional] <p>Set the charset.</p>
9678
     *
9679
     * @return false|int
9680
     */
9681 10
    public static function strspn(
9682
        string $str,
9683
        string $mask,
9684
        int $offset = 0,
9685
        int $length = null,
9686
        string $encoding = 'UTF-8'
9687
    ) {
9688 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9689
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9690
        }
9691
9692 10
        if ($offset || $length !== null) {
9693 2
            if ($encoding === 'UTF-8') {
9694 2
                if ($length === null) {
9695
                    $str = (string) \mb_substr($str, $offset);
9696
                } else {
9697 2
                    $str = (string) \mb_substr($str, $offset, $length);
9698
                }
9699
            } else {
9700
                $str = (string) self::substr($str, $offset, $length, $encoding);
9701
            }
9702
        }
9703
9704 10
        if ($str === '' || $mask === '') {
9705 2
            return 0;
9706
        }
9707
9708 8
        $matches = [];
9709
9710 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9711
    }
9712
9713
    /**
9714
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9715
     *
9716
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9717
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9718
     * @param bool   $before_needle [optional] <p>
9719
     *                              If <b>TRUE</b>, strstr() returns the part of the
9720
     *                              haystack before the first occurrence of the needle (excluding the needle).
9721
     *                              </p>
9722
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9723
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9724
     *
9725
     * @return false|string
9726
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9727
     */
9728 3
    public static function strstr(
9729
        string $haystack,
9730
        string $needle,
9731
        bool $before_needle = false,
9732
        string $encoding = 'UTF-8',
9733
        $cleanUtf8 = false
9734
    ) {
9735 3
        if ($haystack === '' || $needle === '') {
9736 2
            return false;
9737
        }
9738
9739 3
        if ($cleanUtf8 === true) {
9740
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9741
            // if invalid characters are found in $haystack before $needle
9742
            $needle = self::clean($needle);
9743
            $haystack = self::clean($haystack);
9744
        }
9745
9746 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9747 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9748
        }
9749
9750
        //
9751
        // fallback via mbstring
9752
        //
9753
9754 3
        if (self::$SUPPORT['mbstring'] === true) {
9755 3
            if ($encoding === 'UTF-8') {
9756 3
                return \mb_strstr($haystack, $needle, $before_needle);
9757
            }
9758
9759 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9760
        }
9761
9762
        //
9763
        // fallback for binary || ascii only
9764
        //
9765
9766
        if (
9767
            $encoding === 'CP850'
9768
            ||
9769
            $encoding === 'ASCII'
9770
        ) {
9771
            return \strstr($haystack, $needle, $before_needle);
9772
        }
9773
9774
        if (
9775
            $encoding !== 'UTF-8'
9776
            &&
9777
            self::$SUPPORT['mbstring'] === false
9778
        ) {
9779
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9780
        }
9781
9782
        //
9783
        // fallback via intl
9784
        //
9785
9786
        if (
9787
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9788
            &&
9789
            self::$SUPPORT['intl'] === true
9790
        ) {
9791
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9792
            if ($returnTmp !== false) {
9793
                return $returnTmp;
9794
            }
9795
        }
9796
9797
        //
9798
        // fallback for ascii only
9799
        //
9800
9801
        if (self::is_ascii($haystack . $needle)) {
9802
            return \strstr($haystack, $needle, $before_needle);
9803
        }
9804
9805
        //
9806
        // fallback via vanilla php
9807
        //
9808
9809
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9810
9811
        if (!isset($match[1])) {
9812
            return false;
9813
        }
9814
9815
        if ($before_needle) {
9816
            return $match[1];
9817
        }
9818
9819
        return self::substr($haystack, (int) self::strlen($match[1]));
9820
    }
9821
9822
    /**
9823
     *  * Finds first occurrence of a string within another.
9824
     *
9825
     * @param string $haystack      <p>
9826
     *                              The string from which to get the first occurrence
9827
     *                              of needle.
9828
     *                              </p>
9829
     * @param string $needle        <p>
9830
     *                              The string to find in haystack.
9831
     *                              </p>
9832
     * @param bool   $before_needle [optional] <p>
9833
     *                              Determines which portion of haystack
9834
     *                              this function returns.
9835
     *                              If set to true, it returns all of haystack
9836
     *                              from the beginning to the first occurrence of needle.
9837
     *                              If set to false, it returns all of haystack
9838
     *                              from the first occurrence of needle to the end,
9839
     *                              </p>
9840
     *
9841
     * @return false|string the portion of haystack,
9842
     *                      or false if needle is not found
9843
     */
9844
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9845
    {
9846
        if ($haystack === '' || $needle === '') {
9847
            return false;
9848
        }
9849
9850
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9851
            // "mb_" is available if overload is used, so use it ...
9852
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9853
        }
9854
9855
        return \strstr($haystack, $needle, $before_needle);
9856
    }
9857
9858
    /**
9859
     * Unicode transformation for case-less matching.
9860
     *
9861
     * @see http://unicode.org/reports/tr21/tr21-5.html
9862
     *
9863
     * @param string      $str       <p>The input string.</p>
9864
     * @param bool        $full      [optional] <p>
9865
     *                               <b>true</b>, replace full case folding chars (default)<br>
9866
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9867
     *                               </p>
9868
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9869
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9870
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9871
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9872
     *                               is for some languages better ...</p>
9873
     *
9874
     * @return string
9875
     */
9876 32
    public static function strtocasefold(
9877
        string $str,
9878
        bool $full = true,
9879
        bool $cleanUtf8 = false,
9880
        string $encoding = 'UTF-8',
9881
        string $lang = null,
9882
        $lower = true
9883
    ): string {
9884 32
        if ($str === '') {
9885 5
            return '';
9886
        }
9887
9888 31
        if ($cleanUtf8 === true) {
9889
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9890
            // if invalid characters are found in $haystack before $needle
9891 2
            $str = self::clean($str);
9892
        }
9893
9894 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9895
9896 31
        if ($lang === null && $encoding === 'UTF-8') {
9897 31
            if ($lower === true) {
9898 2
                return \mb_strtolower($str);
9899
            }
9900
9901 29
            return \mb_strtoupper($str);
9902
        }
9903
9904 2
        if ($lower === true) {
9905
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9906
        }
9907
9908 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9909
    }
9910
9911
    /**
9912
     * Make a string lowercase.
9913
     *
9914
     * @see http://php.net/manual/en/function.mb-strtolower.php
9915
     *
9916
     * @param string      $str                   <p>The string being lowercased.</p>
9917
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9918
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9919
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9920
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9921
     *
9922
     * @return string
9923
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9924
     */
9925 73
    public static function strtolower(
9926
        $str,
9927
        string $encoding = 'UTF-8',
9928
        bool $cleanUtf8 = false,
9929
        string $lang = null,
9930
        bool $tryToKeepStringLength = false
9931
    ): string {
9932
        // init
9933 73
        $str = (string) $str;
9934
9935 73
        if ($str === '') {
9936 1
            return '';
9937
        }
9938
9939 72
        if ($cleanUtf8 === true) {
9940
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9941
            // if invalid characters are found in $haystack before $needle
9942 2
            $str = self::clean($str);
9943
        }
9944
9945
        // hack for old php version or for the polyfill ...
9946 72
        if ($tryToKeepStringLength === true) {
9947
            $str = self::fixStrCaseHelper($str, true);
9948
        }
9949
9950 72
        if ($lang === null && $encoding === 'UTF-8') {
9951 13
            return \mb_strtolower($str);
9952
        }
9953
9954 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9955
9956 61
        if ($lang !== null) {
9957 2
            if (self::$SUPPORT['intl'] === true) {
9958 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
9959
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
9960
                }
9961
9962 2
                $langCode = $lang . '-Lower';
9963 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
9964
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9965
9966
                    $langCode = 'Any-Lower';
9967
                }
9968
9969
                /** @noinspection PhpComposerExtensionStubsInspection */
9970
                /** @noinspection UnnecessaryCastingInspection */
9971 2
                return (string) \transliterator_transliterate($langCode, $str);
9972
            }
9973
9974
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9975
        }
9976
9977
        // always fallback via symfony polyfill
9978 61
        return \mb_strtolower($str, $encoding);
9979
    }
9980
9981
    /**
9982
     * Make a string uppercase.
9983
     *
9984
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9985
     *
9986
     * @param string      $str                   <p>The string being uppercased.</p>
9987
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9988
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9989
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9990
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9991
     *
9992
     * @return string
9993
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9994
     */
9995 17
    public static function strtoupper(
9996
        $str,
9997
        string $encoding = 'UTF-8',
9998
        bool $cleanUtf8 = false,
9999
        string $lang = null,
10000
        bool $tryToKeepStringLength = false
10001
    ): string {
10002
        // init
10003 17
        $str = (string) $str;
10004
10005 17
        if ($str === '') {
10006 1
            return '';
10007
        }
10008
10009 16
        if ($cleanUtf8 === true) {
10010
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10011
            // if invalid characters are found in $haystack before $needle
10012 2
            $str = self::clean($str);
10013
        }
10014
10015
        // hack for old php version or for the polyfill ...
10016 16
        if ($tryToKeepStringLength === true) {
10017 2
            $str = self::fixStrCaseHelper($str, false);
10018
        }
10019
10020 16
        if ($lang === null && $encoding === 'UTF-8') {
10021 8
            return \mb_strtoupper($str);
10022
        }
10023
10024 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10025
10026 10
        if ($lang !== null) {
10027 2
            if (self::$SUPPORT['intl'] === true) {
10028 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10029
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10030
                }
10031
10032 2
                $langCode = $lang . '-Upper';
10033 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10034
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10035
10036
                    $langCode = 'Any-Upper';
10037
                }
10038
10039
                /** @noinspection PhpComposerExtensionStubsInspection */
10040
                /** @noinspection UnnecessaryCastingInspection */
10041 2
                return (string) \transliterator_transliterate($langCode, $str);
10042
            }
10043
10044
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10045
        }
10046
10047
        // always fallback via symfony polyfill
10048 10
        return \mb_strtoupper($str, $encoding);
10049
    }
10050
10051
    /**
10052
     * Translate characters or replace sub-strings.
10053
     *
10054
     * @see  http://php.net/manual/en/function.strtr.php
10055
     *
10056
     * @param string          $str  <p>The string being translated.</p>
10057
     * @param string|string[] $from <p>The string replacing from.</p>
10058
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10059
     *
10060
     * @return string
10061
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10062
     *                corresponding character in to
10063
     */
10064 2
    public static function strtr(string $str, $from, $to = ''): string
10065
    {
10066 2
        if ($str === '') {
10067
            return '';
10068
        }
10069
10070 2
        if ($from === $to) {
10071
            return $str;
10072
        }
10073
10074 2
        if ($to !== '') {
10075 2
            $from = self::str_split($from);
10076 2
            $to = self::str_split($to);
10077 2
            $countFrom = \count($from);
10078 2
            $countTo = \count($to);
10079
10080 2
            if ($countFrom > $countTo) {
10081 2
                $from = \array_slice($from, 0, $countTo);
10082 2
            } elseif ($countFrom < $countTo) {
10083 2
                $to = \array_slice($to, 0, $countFrom);
10084
            }
10085
10086 2
            $from = \array_combine($from, $to);
10087 2
            if ($from === false) {
10088
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10089
            }
10090
        }
10091
10092 2
        if (\is_string($from)) {
10093 2
            return \str_replace($from, '', $str);
10094
        }
10095
10096 2
        return \strtr($str, $from);
10097
    }
10098
10099
    /**
10100
     * Return the width of a string.
10101
     *
10102
     * @param string $str       <p>The input string.</p>
10103
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10104
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10105
     *
10106
     * @return int
10107
     */
10108 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10109
    {
10110 2
        if ($str === '') {
10111 2
            return 0;
10112
        }
10113
10114 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10115 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10116
        }
10117
10118 2
        if ($cleanUtf8 === true) {
10119
            // iconv and mbstring are not tolerant to invalid encoding
10120
            // further, their behaviour is inconsistent with that of PHP's substr
10121 2
            $str = self::clean($str);
10122
        }
10123
10124
        //
10125
        // fallback via mbstring
10126
        //
10127
10128 2
        if (self::$SUPPORT['mbstring'] === true) {
10129 2
            if ($encoding === 'UTF-8') {
10130 2
                return \mb_strwidth($str);
10131
            }
10132
10133
            return \mb_strwidth($str, $encoding);
10134
        }
10135
10136
        //
10137
        // fallback via vanilla php
10138
        //
10139
10140
        if ($encoding !== 'UTF-8') {
10141
            $str = self::encode('UTF-8', $str, false, $encoding);
10142
        }
10143
10144
        $wide = 0;
10145
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10146
10147
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10148
    }
10149
10150
    /**
10151
     * Get part of a string.
10152
     *
10153
     * @see http://php.net/manual/en/function.mb-substr.php
10154
     *
10155
     * @param string $str       <p>The string being checked.</p>
10156
     * @param int    $offset    <p>The first position used in str.</p>
10157
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10158
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10159
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10160
     *
10161
     * @return false|string
10162
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10163
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10164
     *                      characters long, <b>FALSE</b> will be returned.
10165
     */
10166 172
    public static function substr(
10167
        string $str,
10168
        int $offset = 0,
10169
        int $length = null,
10170
        string $encoding = 'UTF-8',
10171
        bool $cleanUtf8 = false
10172
    ) {
10173
        // empty string
10174 172
        if ($str === '' || $length === 0) {
10175 8
            return '';
10176
        }
10177
10178 168
        if ($cleanUtf8 === true) {
10179
            // iconv and mbstring are not tolerant to invalid encoding
10180
            // further, their behaviour is inconsistent with that of PHP's substr
10181 2
            $str = self::clean($str);
10182
        }
10183
10184
        // whole string
10185 168
        if (!$offset && $length === null) {
10186 7
            return $str;
10187
        }
10188
10189 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10190 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10191
        }
10192
10193
        //
10194
        // fallback via mbstring
10195
        //
10196
10197 163
        if (self::$SUPPORT['mbstring'] === true) {
10198 161
            if ($encoding === 'UTF-8') {
10199 161
                if ($length === null) {
10200 64
                    return \mb_substr($str, $offset);
10201
                }
10202
10203 102
                return \mb_substr($str, $offset, $length);
10204
            }
10205
10206
            return self::substr($str, $offset, $length, $encoding);
10207
        }
10208
10209
        //
10210
        // fallback for binary || ascii only
10211
        //
10212
10213
        if (
10214 4
            $encoding === 'CP850'
10215
            ||
10216 4
            $encoding === 'ASCII'
10217
        ) {
10218
            if ($length === null) {
10219
                return \substr($str, $offset);
10220
            }
10221
10222
            return \substr($str, $offset, $length);
10223
        }
10224
10225
        // otherwise we need the string-length
10226 4
        $str_length = 0;
10227 4
        if ($offset || $length === null) {
10228 4
            $str_length = self::strlen($str, $encoding);
10229
        }
10230
10231
        // e.g.: invalid chars + mbstring not installed
10232 4
        if ($str_length === false) {
10233
            return false;
10234
        }
10235
10236
        // empty string
10237 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10238
            return '';
10239
        }
10240
10241
        // impossible
10242 4
        if ($offset && $offset > $str_length) {
10243
            return '';
10244
        }
10245
10246 4
        if ($length === null) {
10247 4
            $length = (int) $str_length;
10248
        } else {
10249 2
            $length = (int) $length;
10250
        }
10251
10252
        if (
10253 4
            $encoding !== 'UTF-8'
10254
            &&
10255 4
            self::$SUPPORT['mbstring'] === false
10256
        ) {
10257 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10258
        }
10259
10260
        //
10261
        // fallback via intl
10262
        //
10263
10264
        if (
10265 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10266
            &&
10267 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10268
            &&
10269 4
            self::$SUPPORT['intl'] === true
10270
        ) {
10271
            $returnTmp = \grapheme_substr($str, $offset, $length);
10272
            if ($returnTmp !== false) {
10273
                return $returnTmp;
10274
            }
10275
        }
10276
10277
        //
10278
        // fallback via iconv
10279
        //
10280
10281
        if (
10282 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10283
            &&
10284 4
            self::$SUPPORT['iconv'] === true
10285
        ) {
10286
            $returnTmp = \iconv_substr($str, $offset, $length);
10287
            if ($returnTmp !== false) {
10288
                return $returnTmp;
10289
            }
10290
        }
10291
10292
        //
10293
        // fallback for ascii only
10294
        //
10295
10296 4
        if (self::is_ascii($str)) {
10297
            return \substr($str, $offset, $length);
10298
        }
10299
10300
        //
10301
        // fallback via vanilla php
10302
        //
10303
10304
        // split to array, and remove invalid characters
10305 4
        $array = self::str_split($str);
10306
10307
        // extract relevant part, and join to make sting again
10308 4
        return \implode('', \array_slice($array, $offset, $length));
10309
    }
10310
10311
    /**
10312
     * Binary safe comparison of two strings from an offset, up to length characters.
10313
     *
10314
     * @param string   $str1               <p>The main string being compared.</p>
10315
     * @param string   $str2               <p>The secondary string being compared.</p>
10316
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10317
     *                                     counting from the end of the string.</p>
10318
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10319
     *                                     of the length of the str compared to the length of main_str less the
10320
     *                                     offset.</p>
10321
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10322
     *                                     insensitive.</p>
10323
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10324
     *
10325
     * @return int
10326
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10327
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10328
     *             <strong>0</strong> if they are equal
10329
     */
10330 2
    public static function substr_compare(
10331
        string $str1,
10332
        string $str2,
10333
        int $offset = 0,
10334
        int $length = null,
10335
        bool $case_insensitivity = false,
10336
        string $encoding = 'UTF-8'
10337
    ): int {
10338
        if (
10339 2
            $offset !== 0
10340
            ||
10341 2
            $length !== null
10342
        ) {
10343 2
            if ($encoding === 'UTF-8') {
10344 2
                if ($length === null) {
10345 2
                    $str1 = (string) \mb_substr($str1, $offset);
10346
                } else {
10347 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10348
                }
10349 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10350
            } else {
10351
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10352
10353
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10354
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10355
            }
10356
        }
10357
10358 2
        if ($case_insensitivity === true) {
10359 2
            return self::strcasecmp($str1, $str2, $encoding);
10360
        }
10361
10362 2
        return self::strcmp($str1, $str2);
10363
    }
10364
10365
    /**
10366
     * Count the number of substring occurrences.
10367
     *
10368
     * @see  http://php.net/manual/en/function.substr-count.php
10369
     *
10370
     * @param string $haystack  <p>The string to search in.</p>
10371
     * @param string $needle    <p>The substring to search for.</p>
10372
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10373
     * @param int    $length    [optional] <p>
10374
     *                          The maximum length after the specified offset to search for the
10375
     *                          substring. It outputs a warning if the offset plus the length is
10376
     *                          greater than the haystack length.
10377
     *                          </p>
10378
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10379
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10380
     *
10381
     * @return false|int this functions returns an integer or false if there isn't a string
10382
     */
10383 5
    public static function substr_count(
10384
        string $haystack,
10385
        string $needle,
10386
        int $offset = 0,
10387
        int $length = null,
10388
        string $encoding = 'UTF-8',
10389
        bool $cleanUtf8 = false
10390
    ) {
10391 5
        if ($haystack === '' || $needle === '') {
10392 2
            return false;
10393
        }
10394
10395 5
        if ($length === 0) {
10396 2
            return 0;
10397
        }
10398
10399 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10400 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10401
        }
10402
10403 5
        if ($cleanUtf8 === true) {
10404
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10405
            // if invalid characters are found in $haystack before $needle
10406
            $needle = self::clean($needle);
10407
            $haystack = self::clean($haystack);
10408
        }
10409
10410 5
        if ($offset || $length > 0) {
10411 2
            if ($length === null) {
10412 2
                $lengthTmp = self::strlen($haystack, $encoding);
10413 2
                if ($lengthTmp === false) {
10414
                    return false;
10415
                }
10416 2
                $length = (int) $lengthTmp;
10417
            }
10418
10419 2
            if ($encoding === 'UTF-8') {
10420 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10421
            } else {
10422 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10423
            }
10424
        }
10425
10426
        if (
10427 5
            $encoding !== 'UTF-8'
10428
            &&
10429 5
            self::$SUPPORT['mbstring'] === false
10430
        ) {
10431
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10432
        }
10433
10434 5
        if (self::$SUPPORT['mbstring'] === true) {
10435 5
            if ($encoding === 'UTF-8') {
10436 5
                return \mb_substr_count($haystack, $needle);
10437
            }
10438
10439 2
            return \mb_substr_count($haystack, $needle, $encoding);
10440
        }
10441
10442
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10443
10444
        return \count($matches);
10445
    }
10446
10447
    /**
10448
     * Count the number of substring occurrences.
10449
     *
10450
     * @param string $haystack <p>
10451
     *                         The string being checked.
10452
     *                         </p>
10453
     * @param string $needle   <p>
10454
     *                         The string being found.
10455
     *                         </p>
10456
     * @param int    $offset   [optional] <p>
10457
     *                         The offset where to start counting
10458
     *                         </p>
10459
     * @param int    $length   [optional] <p>
10460
     *                         The maximum length after the specified offset to search for the
10461
     *                         substring. It outputs a warning if the offset plus the length is
10462
     *                         greater than the haystack length.
10463
     *                         </p>
10464
     *
10465
     * @return false|int the number of times the
10466
     *                   needle substring occurs in the
10467
     *                   haystack string
10468
     */
10469
    public static function substr_count_in_byte(
10470
        string $haystack,
10471
        string $needle,
10472
        int $offset = 0,
10473
        int $length = null
10474
    ) {
10475
        if ($haystack === '' || $needle === '') {
10476
            return 0;
10477
        }
10478
10479
        if (
10480
            ($offset || $length !== null)
10481
            &&
10482
            self::$SUPPORT['mbstring_func_overload'] === true
10483
        ) {
10484
            if ($length === null) {
10485
                $lengthTmp = self::strlen($haystack);
10486
                if ($lengthTmp === false) {
10487
                    return false;
10488
                }
10489
                $length = (int) $lengthTmp;
10490
            }
10491
10492
            if (
10493
                (
10494
                    $length !== 0
10495
                    &&
10496
                    $offset !== 0
10497
                )
10498
                &&
10499
                ($length + $offset) <= 0
10500
                &&
10501
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10502
            ) {
10503
                return false;
10504
            }
10505
10506
            $haystackTmp = \substr($haystack, $offset, $length);
10507
            if ($haystackTmp === false) {
10508
                $haystackTmp = '';
10509
            }
10510
            $haystack = (string) $haystackTmp;
10511
        }
10512
10513
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10514
            // "mb_" is available if overload is used, so use it ...
10515
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10516
        }
10517
10518
        if ($length === null) {
10519
            return \substr_count($haystack, $needle, $offset);
10520
        }
10521
10522
        return \substr_count($haystack, $needle, $offset, $length);
10523
    }
10524
10525
    /**
10526
     * Returns the number of occurrences of $substring in the given string.
10527
     * By default, the comparison is case-sensitive, but can be made insensitive
10528
     * by setting $caseSensitive to false.
10529
     *
10530
     * @param string $str           <p>The input string.</p>
10531
     * @param string $substring     <p>The substring to search for.</p>
10532
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10533
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10534
     *
10535
     * @return int
10536
     */
10537 15
    public static function substr_count_simple(
10538
        string $str,
10539
        string $substring,
10540
        bool $caseSensitive = true,
10541
        string $encoding = 'UTF-8'
10542
    ): int {
10543 15
        if ($str === '' || $substring === '') {
10544 2
            return 0;
10545
        }
10546
10547 13
        if ($encoding === 'UTF-8') {
10548 7
            if ($caseSensitive) {
10549
                return (int) \mb_substr_count($str, $substring);
10550
            }
10551
10552 7
            return (int) \mb_substr_count(
10553 7
                \mb_strtoupper($str),
10554 7
                \mb_strtoupper($substring)
10555
10556
            );
10557
        }
10558
10559 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10560
10561 6
        if ($caseSensitive) {
10562 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10563
        }
10564
10565 3
        return (int) \mb_substr_count(
10566 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10567 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10568 3
            $encoding
10569
        );
10570
    }
10571
10572
    /**
10573
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10574
     *
10575
     * @param string $haystack <p>The string to search in.</p>
10576
     * @param string $needle   <p>The substring to search for.</p>
10577
     *
10578
     * @return string return the sub-string
10579
     */
10580 2
    public static function substr_ileft(string $haystack, string $needle): string
10581
    {
10582 2
        if ($haystack === '') {
10583 2
            return '';
10584
        }
10585
10586 2
        if ($needle === '') {
10587 2
            return $haystack;
10588
        }
10589
10590 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10591 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10592
        }
10593
10594 2
        return $haystack;
10595
    }
10596
10597
    /**
10598
     * Get part of a string process in bytes.
10599
     *
10600
     * @param string $str    <p>The string being checked.</p>
10601
     * @param int    $offset <p>The first position used in str.</p>
10602
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10603
     *
10604
     * @return false|string
10605
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10606
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10607
     *                      characters long, <b>FALSE</b> will be returned.
10608
     */
10609
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10610
    {
10611
        // empty string
10612
        if ($str === '' || $length === 0) {
10613
            return '';
10614
        }
10615
10616
        // whole string
10617
        if (!$offset && $length === null) {
10618
            return $str;
10619
        }
10620
10621
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10622
            // "mb_" is available if overload is used, so use it ...
10623
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10624
        }
10625
10626
        return \substr($str, $offset, $length ?? 2147483647);
10627
    }
10628
10629
    /**
10630
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10631
     *
10632
     * @param string $haystack <p>The string to search in.</p>
10633
     * @param string $needle   <p>The substring to search for.</p>
10634
     *
10635
     * @return string return the sub-string
10636
     */
10637 2
    public static function substr_iright(string $haystack, string $needle): string
10638
    {
10639 2
        if ($haystack === '') {
10640 2
            return '';
10641
        }
10642
10643 2
        if ($needle === '') {
10644 2
            return $haystack;
10645
        }
10646
10647 2
        if (self::str_iends_with($haystack, $needle) === true) {
10648 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10649
        }
10650
10651 2
        return $haystack;
10652
    }
10653
10654
    /**
10655
     * Removes an prefix ($needle) from start of the string ($haystack).
10656
     *
10657
     * @param string $haystack <p>The string to search in.</p>
10658
     * @param string $needle   <p>The substring to search for.</p>
10659
     *
10660
     * @return string return the sub-string
10661
     */
10662 2
    public static function substr_left(string $haystack, string $needle): string
10663
    {
10664 2
        if ($haystack === '') {
10665 2
            return '';
10666
        }
10667
10668 2
        if ($needle === '') {
10669 2
            return $haystack;
10670
        }
10671
10672 2
        if (self::str_starts_with($haystack, $needle) === true) {
10673 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10674
        }
10675
10676 2
        return $haystack;
10677
    }
10678
10679
    /**
10680
     * Replace text within a portion of a string.
10681
     *
10682
     * source: https://gist.github.com/stemar/8287074
10683
     *
10684
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10685
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10686
     * @param int|int[]       $offset      <p>
10687
     *                                     If start is positive, the replacing will begin at the start'th offset
10688
     *                                     into string.
10689
     *                                     <br><br>
10690
     *                                     If start is negative, the replacing will begin at the start'th character
10691
     *                                     from the end of string.
10692
     *                                     </p>
10693
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10694
     *                                     portion of string which is to be replaced. If it is negative, it
10695
     *                                     represents the number of characters from the end of string at which to
10696
     *                                     stop replacing. If it is not given, then it will default to strlen(
10697
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10698
     *                                     length is zero then this function will have the effect of inserting
10699
     *                                     replacement into string at the given start offset.</p>
10700
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10701
     *
10702
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10703
     */
10704 10
    public static function substr_replace(
10705
        $str,
10706
        $replacement,
10707
        $offset,
10708
        $length = null,
10709
        string $encoding = 'UTF-8'
10710
    ) {
10711 10
        if (\is_array($str) === true) {
10712 1
            $num = \count($str);
10713
10714
            // the replacement
10715 1
            if (\is_array($replacement) === true) {
10716 1
                $replacement = \array_slice($replacement, 0, $num);
10717
            } else {
10718 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10719
            }
10720
10721
            // the offset
10722 1
            if (\is_array($offset) === true) {
10723 1
                $offset = \array_slice($offset, 0, $num);
10724 1
                foreach ($offset as &$valueTmp) {
10725 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10726
                }
10727 1
                unset($valueTmp);
10728
            } else {
10729 1
                $offset = \array_pad([$offset], $num, $offset);
10730
            }
10731
10732
            // the length
10733 1
            if ($length === null) {
10734 1
                $length = \array_fill(0, $num, 0);
10735 1
            } elseif (\is_array($length) === true) {
10736 1
                $length = \array_slice($length, 0, $num);
10737 1
                foreach ($length as &$valueTmpV2) {
10738 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10739
                }
10740 1
                unset($valueTmpV2);
10741
            } else {
10742 1
                $length = \array_pad([$length], $num, $length);
10743
            }
10744
10745
            // recursive call
10746 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10747
        }
10748
10749 10
        if (\is_array($replacement) === true) {
10750 1
            if (\count($replacement) > 0) {
10751 1
                $replacement = $replacement[0];
10752
            } else {
10753 1
                $replacement = '';
10754
            }
10755
        }
10756
10757
        // init
10758 10
        $str = (string) $str;
10759 10
        $replacement = (string) $replacement;
10760
10761 10
        if (\is_array($length) === true) {
10762
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10763
        }
10764
10765 10
        if (\is_array($offset) === true) {
10766
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10767
        }
10768
10769 10
        if ($str === '') {
10770 1
            return $replacement;
10771
        }
10772
10773 9
        if (self::$SUPPORT['mbstring'] === true) {
10774 9
            $string_length = (int) self::strlen($str, $encoding);
10775
10776 9
            if ($offset < 0) {
10777 1
                $offset = (int) \max(0, $string_length + $offset);
10778 9
            } elseif ($offset > $string_length) {
10779 1
                $offset = $string_length;
10780
            }
10781
10782 9
            if ($length !== null && $length < 0) {
10783 1
                $length = (int) \max(0, $string_length - $offset + $length);
10784 9
            } elseif ($length === null || $length > $string_length) {
10785 4
                $length = $string_length;
10786
            }
10787
10788
            /** @noinspection AdditionOperationOnArraysInspection */
10789 9
            if (($offset + $length) > $string_length) {
10790 4
                $length = $string_length - $offset;
10791
            }
10792
10793
            /** @noinspection AdditionOperationOnArraysInspection */
10794 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10795 9
                   $replacement .
10796 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10797
        }
10798
10799
        //
10800
        // fallback for ascii only
10801
        //
10802
10803
        if (self::is_ascii($str)) {
10804
            return ($length === null) ?
10805
                \substr_replace($str, $replacement, $offset) :
10806
                \substr_replace($str, $replacement, $offset, $length);
10807
        }
10808
10809
        //
10810
        // fallback via vanilla php
10811
        //
10812
10813
        \preg_match_all('/./us', $str, $smatches);
10814
        \preg_match_all('/./us', $replacement, $rmatches);
10815
10816
        if ($length === null) {
10817
            $lengthTmp = self::strlen($str, $encoding);
10818
            if ($lengthTmp === false) {
10819
                // e.g.: non mbstring support + invalid chars
10820
                return '';
10821
            }
10822
            $length = (int) $lengthTmp;
10823
        }
10824
10825
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10826
10827
        return \implode('', $smatches[0]);
10828
    }
10829
10830
    /**
10831
     * Removes an suffix ($needle) from end of the string ($haystack).
10832
     *
10833
     * @param string $haystack <p>The string to search in.</p>
10834
     * @param string $needle   <p>The substring to search for.</p>
10835
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10836
     *
10837
     * @return string return the sub-string
10838
     */
10839 2
    public static function substr_right(
10840
        string $haystack,
10841
        string $needle,
10842
        string $encoding = 'UTF-8'
10843
    ): string {
10844 2
        if ($haystack === '') {
10845 2
            return '';
10846
        }
10847
10848 2
        if ($needle === '') {
10849 2
            return $haystack;
10850
        }
10851
10852
        if (
10853 2
            $encoding === 'UTF-8'
10854
            &&
10855 2
            \substr($haystack, -\strlen($needle)) === $needle
10856
        ) {
10857 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10858
        }
10859
10860 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10861
            return (string) self::substr(
10862
                $haystack,
10863
                0,
10864
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10865
                $encoding
10866
            );
10867
        }
10868
10869 2
        return $haystack;
10870
    }
10871
10872
    /**
10873
     * Returns a case swapped version of the string.
10874
     *
10875
     * @param string $str       <p>The input string.</p>
10876
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10877
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10878
     *
10879
     * @return string each character's case swapped
10880
     */
10881 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10882
    {
10883 6
        if ($str === '') {
10884 1
            return '';
10885
        }
10886
10887 6
        if ($cleanUtf8 === true) {
10888
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10889
            // if invalid characters are found in $haystack before $needle
10890 2
            $str = self::clean($str);
10891
        }
10892
10893 6
        if ($encoding === 'UTF-8') {
10894 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10895
        }
10896
10897 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10898
    }
10899
10900
    /**
10901
     * Checks whether symfony-polyfills are used.
10902
     *
10903
     * @return bool
10904
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10905
     */
10906
    public static function symfony_polyfill_used(): bool
10907
    {
10908
        // init
10909
        $return = false;
10910
10911
        $returnTmp = \extension_loaded('mbstring');
10912
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10913
            $return = true;
10914
        }
10915
10916
        $returnTmp = \extension_loaded('iconv');
10917
        if ($returnTmp === false && \function_exists('iconv')) {
10918
            $return = true;
10919
        }
10920
10921
        return $return;
10922
    }
10923
10924
    /**
10925
     * @param string $str
10926
     * @param int    $tabLength
10927
     *
10928
     * @return string
10929
     */
10930 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10931
    {
10932 6
        if ($tabLength === 4) {
10933 3
            $spaces = '    ';
10934 3
        } elseif ($tabLength === 2) {
10935 1
            $spaces = '  ';
10936
        } else {
10937 2
            $spaces = \str_repeat(' ', $tabLength);
10938
        }
10939
10940 6
        return \str_replace("\t", $spaces, $str);
10941
    }
10942
10943
    /**
10944
     * Converts the first character of each word in the string to uppercase
10945
     * and all other chars to lowercase.
10946
     *
10947
     * @param string      $str                   <p>The input string.</p>
10948
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10949
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10950
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10951
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10952
     *
10953
     * @return string string with all characters of $str being title-cased
10954
     */
10955 5
    public static function titlecase(
10956
        string $str,
10957
        string $encoding = 'UTF-8',
10958
        bool $cleanUtf8 = false,
10959
        string $lang = null,
10960
        bool $tryToKeepStringLength = false
10961
    ): string {
10962 5
        if ($cleanUtf8 === true) {
10963
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10964
            // if invalid characters are found in $haystack before $needle
10965
            $str = self::clean($str);
10966
        }
10967
10968 5
        if ($lang === null && $tryToKeepStringLength === false) {
10969 5
            if ($encoding === 'UTF-8') {
10970 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10971
            }
10972
10973 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10974
10975 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10976
        }
10977
10978
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10979
    }
10980
10981
    /**
10982
     * alias for "UTF8::to_ascii()"
10983
     *
10984
     * @see        UTF8::to_ascii()
10985
     *
10986
     * @param string $str
10987
     * @param string $subst_chr
10988
     * @param bool   $strict
10989
     *
10990
     * @return string
10991
     *
10992
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10993
     */
10994 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10995
    {
10996 7
        return self::to_ascii($str, $subst_chr, $strict);
10997
    }
10998
10999
    /**
11000
     * alias for "UTF8::to_iso8859()"
11001
     *
11002
     * @see        UTF8::to_iso8859()
11003
     *
11004
     * @param string|string[] $str
11005
     *
11006
     * @return string|string[]
11007
     *
11008
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11009
     */
11010 2
    public static function toIso8859($str)
11011
    {
11012 2
        return self::to_iso8859($str);
11013
    }
11014
11015
    /**
11016
     * alias for "UTF8::to_latin1()"
11017
     *
11018
     * @see        UTF8::to_latin1()
11019
     *
11020
     * @param string|string[] $str
11021
     *
11022
     * @return string|string[]
11023
     *
11024
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11025
     */
11026 2
    public static function toLatin1($str)
11027
    {
11028 2
        return self::to_latin1($str);
11029
    }
11030
11031
    /**
11032
     * alias for "UTF8::to_utf8()"
11033
     *
11034
     * @see        UTF8::to_utf8()
11035
     *
11036
     * @param string|string[] $str
11037
     *
11038
     * @return string|string[]
11039
     *
11040
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11041
     */
11042 2
    public static function toUTF8($str)
11043
    {
11044 2
        return self::to_utf8($str);
11045
    }
11046
11047
    /**
11048
     * Convert a string into ASCII.
11049
     *
11050
     * @param string $str     <p>The input string.</p>
11051
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11052
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11053
     *                        performance</p>
11054
     *
11055
     * @return string
11056
     */
11057 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11058
    {
11059 38
        static $UTF8_TO_ASCII;
11060
11061 38
        if ($str === '') {
11062 3
            return '';
11063
        }
11064
11065
        // check if we only have ASCII, first (better performance)
11066 35
        if (self::is_ascii($str) === true) {
11067 9
            return $str;
11068
        }
11069
11070 28
        $str = self::clean(
11071 28
            $str,
11072 28
            true,
11073 28
            true,
11074 28
            true,
11075 28
            false,
11076 28
            true,
11077 28
            true
11078
        );
11079
11080
        // check again, if we only have ASCII, now ...
11081 28
        if (self::is_ascii($str) === true) {
11082 10
            return $str;
11083
        }
11084
11085
        if (
11086 19
            $strict === true
11087
            &&
11088 19
            self::$SUPPORT['intl'] === true
11089
        ) {
11090
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11091
            /** @noinspection PhpComposerExtensionStubsInspection */
11092
            /** @noinspection UnnecessaryCastingInspection */
11093 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11094
11095
            // check again, if we only have ASCII, now ...
11096 1
            if (self::is_ascii($str) === true) {
11097 1
                return $str;
11098
            }
11099
        }
11100
11101 19
        if (self::$ORD === null) {
11102
            self::$ORD = self::getData('ord');
11103
        }
11104
11105 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11106 19
        $chars = $ar[0];
11107 19
        $ord = null;
11108 19
        foreach ($chars as &$c) {
11109 19
            $ordC0 = self::$ORD[$c[0]];
11110
11111 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11112 15
                continue;
11113
            }
11114
11115 19
            $ordC1 = self::$ORD[$c[1]];
11116
11117
            // ASCII - next please
11118 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11119 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11120
            }
11121
11122 19
            if ($ordC0 >= 224) {
11123 8
                $ordC2 = self::$ORD[$c[2]];
11124
11125 8
                if ($ordC0 <= 239) {
11126 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11127
                }
11128
11129 8
                if ($ordC0 >= 240) {
11130 2
                    $ordC3 = self::$ORD[$c[3]];
11131
11132 2
                    if ($ordC0 <= 247) {
11133 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11134
                    }
11135
11136 2
                    if ($ordC0 >= 248) {
11137
                        $ordC4 = self::$ORD[$c[4]];
11138
11139
                        if ($ordC0 <= 251) {
11140
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11141
                        }
11142
11143
                        if ($ordC0 >= 252) {
11144
                            $ordC5 = self::$ORD[$c[5]];
11145
11146
                            if ($ordC0 <= 253) {
11147
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11148
                            }
11149
                        }
11150
                    }
11151
                }
11152
            }
11153
11154 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11155
                $c = $unknown;
11156
11157
                continue;
11158
            }
11159
11160 19
            if ($ord === null) {
11161
                $c = $unknown;
11162
11163
                continue;
11164
            }
11165
11166 19
            $bank = $ord >> 8;
11167 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11168 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11169 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11170 2
                    $UTF8_TO_ASCII[$bank] = [];
11171
                }
11172
            }
11173
11174 19
            $newchar = $ord & 255;
11175
11176
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11177 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11178
11179
                // keep for debugging
11180
                /*
11181
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11182
                echo "char: " . $c . "\n";
11183
                echo "ord: " . $ord . "\n";
11184
                echo "newchar: " . $newchar . "\n";
11185
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11186
                echo "bank:" . $bank . "\n\n";
11187
                 */
11188
11189 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11190
            } else {
11191
11192
                // keep for debugging missing chars
11193
                /*
11194
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11195
                echo "char: " . $c . "\n";
11196
                echo "ord: " . $ord . "\n";
11197
                echo "newchar: " . $newchar . "\n";
11198
                echo "bank:" . $bank . "\n\n";
11199
                 */
11200
11201 19
                $c = $unknown;
11202
            }
11203
        }
11204
11205 19
        return \implode('', $chars);
11206
    }
11207
11208
    /**
11209
     * @param mixed $str
11210
     *
11211
     * @return bool
11212
     */
11213 19
    public static function to_boolean($str): bool
11214
    {
11215
        // init
11216 19
        $str = (string) $str;
11217
11218 19
        if ($str === '') {
11219 2
            return false;
11220
        }
11221
11222
        // Info: http://php.net/manual/en/filter.filters.validate.php
11223
        $map = [
11224 17
            'true'  => true,
11225
            '1'     => true,
11226
            'on'    => true,
11227
            'yes'   => true,
11228
            'false' => false,
11229
            '0'     => false,
11230
            'off'   => false,
11231
            'no'    => false,
11232
        ];
11233
11234 17
        if (isset($map[$str])) {
11235 11
            return $map[$str];
11236
        }
11237
11238 6
        $key = \strtolower($str);
11239 6
        if (isset($map[$key])) {
11240 2
            return $map[$key];
11241
        }
11242
11243 4
        if (\is_numeric($str)) {
11244 2
            return ((float) $str + 0) > 0;
11245
        }
11246
11247 2
        return (bool) \trim($str);
11248
    }
11249
11250
    /**
11251
     * Convert given string to safe filename (and keep string case).
11252
     *
11253
     * @param string $string
11254
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11255
     *                                  simply replaced with hyphen.
11256
     * @param string $fallback_char
11257
     *
11258
     * @return string
11259
     */
11260 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11261
    {
11262 1
        if ($use_transliterate === true) {
11263 1
            $string = self::str_transliterate($string, $fallback_char);
11264
        }
11265
11266 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11267
11268 1
        $string = (string) \preg_replace(
11269
            [
11270 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11271 1
                '/[\s]+/u',                                           // 2) convert spaces to $fallback_char
11272 1
                '/[' . $fallback_char_escaped . ']+/u',               // 3) remove double $fallback_char's
11273
            ],
11274
            [
11275 1
                '',
11276 1
                $fallback_char,
11277 1
                $fallback_char,
11278
            ],
11279 1
            $string
11280
        );
11281
11282
        // trim "$fallback_char" from beginning and end of the string
11283 1
        return \trim($string, $fallback_char);
11284
    }
11285
11286
    /**
11287
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11288
     *
11289
     * @param string|string[] $str
11290
     *
11291
     * @return string|string[]
11292
     */
11293 8
    public static function to_iso8859($str)
11294
    {
11295 8
        if (\is_array($str) === true) {
11296 2
            foreach ($str as $k => &$v) {
11297 2
                $v = self::to_iso8859($v);
11298
            }
11299
11300 2
            return $str;
11301
        }
11302
11303 8
        $str = (string) $str;
11304 8
        if ($str === '') {
11305 2
            return '';
11306
        }
11307
11308 8
        return self::utf8_decode($str);
11309
    }
11310
11311
    /**
11312
     * alias for "UTF8::to_iso8859()"
11313
     *
11314
     * @see UTF8::to_iso8859()
11315
     *
11316
     * @param string|string[] $str
11317
     *
11318
     * @return string|string[]
11319
     */
11320 2
    public static function to_latin1($str)
11321
    {
11322 2
        return self::to_iso8859($str);
11323
    }
11324
11325
    /**
11326
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11327
     *
11328
     * <ul>
11329
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11330
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11331
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11332
     * case.</li>
11333
     * </ul>
11334
     *
11335
     * @param string|string[] $str                    <p>Any string or array.</p>
11336
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11337
     *
11338
     * @return string|string[] the UTF-8 encoded string
11339
     */
11340 41
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11341
    {
11342 41
        if (\is_array($str) === true) {
11343 4
            foreach ($str as $k => &$v) {
11344 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11345
            }
11346
11347 4
            return $str;
11348
        }
11349
11350 41
        $str = (string) $str;
11351 41
        if ($str === '') {
11352 6
            return $str;
11353
        }
11354
11355 41
        $max = \strlen($str);
11356 41
        $buf = '';
11357
11358 41
        for ($i = 0; $i < $max; ++$i) {
11359 41
            $c1 = $str[$i];
11360
11361 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11362
11363 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11364
11365 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11366
11367 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11368 20
                        $buf .= $c1 . $c2;
11369 20
                        ++$i;
11370
                    } else { // not valid UTF8 - convert it
11371 34
                        $buf .= self::to_utf8_convert_helper($c1);
11372
                    }
11373 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11374
11375 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11376 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11377
11378 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11379 15
                        $buf .= $c1 . $c2 . $c3;
11380 15
                        $i += 2;
11381
                    } else { // not valid UTF8 - convert it
11382 33
                        $buf .= self::to_utf8_convert_helper($c1);
11383
                    }
11384 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11385
11386 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11387 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11388 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11389
11390 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11391 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11392 8
                        $i += 3;
11393
                    } else { // not valid UTF8 - convert it
11394 26
                        $buf .= self::to_utf8_convert_helper($c1);
11395
                    }
11396
                } else { // doesn't look like UTF8, but should be converted
11397
11398 37
                    $buf .= self::to_utf8_convert_helper($c1);
11399
                }
11400 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11401
11402 4
                $buf .= self::to_utf8_convert_helper($c1);
11403
            } else { // it doesn't need conversion
11404
11405 38
                $buf .= $c1;
11406
            }
11407
        }
11408
11409
        // decode unicode escape sequences + unicode surrogate pairs
11410 41
        $buf = \preg_replace_callback(
11411 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11412
            /**
11413
             * @param array $matches
11414
             *
11415
             * @return string
11416
             */
11417
            static function (array $matches): string {
11418 12
                if (isset($matches[3])) {
11419 12
                    $cp = (int) \hexdec($matches[3]);
11420
                } else {
11421
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11422
                    $cp = ((int) \hexdec($matches[1]) << 10)
11423
                          + (int) \hexdec($matches[2])
11424
                          + 0x10000
11425
                          - (0xD800 << 10)
11426
                          - 0xDC00;
11427
                }
11428
11429
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11430
                //
11431
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11432
11433 12
                if ($cp < 0x80) {
11434 8
                    return (string) self::chr($cp);
11435
                }
11436
11437 9
                if ($cp < 0xA0) {
11438
                    /** @noinspection UnnecessaryCastingInspection */
11439
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11440
                }
11441
11442 9
                return self::decimal_to_chr($cp);
11443 41
            },
11444 41
            $buf
11445
        );
11446
11447 41
        if ($buf === null) {
11448
            return '';
11449
        }
11450
11451
        // decode UTF-8 codepoints
11452 41
        if ($decodeHtmlEntityToUtf8 === true) {
11453 2
            $buf = self::html_entity_decode($buf);
11454
        }
11455
11456 41
        return $buf;
11457
    }
11458
11459
    /**
11460
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11461
     *
11462
     * INFO: This is slower then "trim()"
11463
     *
11464
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11465
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11466
     *
11467
     * @param string      $str   <p>The string to be trimmed</p>
11468
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11469
     *
11470
     * @return string the trimmed string
11471
     */
11472 55
    public static function trim(string $str = '', string $chars = null): string
11473
    {
11474 55
        if ($str === '') {
11475 9
            return '';
11476
        }
11477
11478 48
        if ($chars) {
11479 27
            $chars = \preg_quote($chars, '/');
11480 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11481
        } else {
11482 21
            $pattern = "^[\s]+|[\s]+\$";
11483
        }
11484
11485 48
        if (self::$SUPPORT['mbstring'] === true) {
11486
            /** @noinspection PhpComposerExtensionStubsInspection */
11487 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11488
        }
11489
11490 8
        return self::regex_replace($str, $pattern, '', '', '/');
11491
    }
11492
11493
    /**
11494
     * Makes string's first char uppercase.
11495
     *
11496
     * @param string      $str                   <p>The input string.</p>
11497
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11498
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11499
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11500
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11501
     *
11502
     * @return string the resulting string
11503
     */
11504 69
    public static function ucfirst(
11505
        string $str,
11506
        string $encoding = 'UTF-8',
11507
        bool $cleanUtf8 = false,
11508
        string $lang = null,
11509
        bool $tryToKeepStringLength = false
11510
    ): string {
11511 69
        if ($str === '') {
11512 3
            return '';
11513
        }
11514
11515 68
        if ($cleanUtf8 === true) {
11516
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11517
            // if invalid characters are found in $haystack before $needle
11518 1
            $str = self::clean($str);
11519
        }
11520
11521 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11522
11523 68
        if ($encoding === 'UTF-8') {
11524 22
            $strPartTwo = (string) \mb_substr($str, 1);
11525
11526 22
            if ($useMbFunction === true) {
11527 22
                $strPartOne = \mb_strtoupper(
11528 22
                    (string) \mb_substr($str, 0, 1)
11529
                );
11530
            } else {
11531
                $strPartOne = self::strtoupper(
11532
                    (string) \mb_substr($str, 0, 1),
11533
                    $encoding,
11534
                    false,
11535
                    $lang,
11536 22
                    $tryToKeepStringLength
11537
                );
11538
            }
11539
        } else {
11540 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11541
11542 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11543
11544 47
            if ($useMbFunction === true) {
11545 47
                $strPartOne = \mb_strtoupper(
11546 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11547 47
                    $encoding
11548
                );
11549
            } else {
11550
                $strPartOne = self::strtoupper(
11551
                    (string) self::substr($str, 0, 1, $encoding),
11552
                    $encoding,
11553
                    false,
11554
                    $lang,
11555
                    $tryToKeepStringLength
11556
                );
11557
            }
11558
        }
11559
11560 68
        return $strPartOne . $strPartTwo;
11561
    }
11562
11563
    /**
11564
     * alias for "UTF8::ucfirst()"
11565
     *
11566
     * @see UTF8::ucfirst()
11567
     *
11568
     * @param string $str
11569
     * @param string $encoding
11570
     * @param bool   $cleanUtf8
11571
     *
11572
     * @return string
11573
     */
11574 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11575
    {
11576 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11577
    }
11578
11579
    /**
11580
     * Uppercase for all words in the string.
11581
     *
11582
     * @param string   $str        <p>The input string.</p>
11583
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11584
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11585
     *                             word.</p>
11586
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11587
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11588
     *
11589
     * @return string
11590
     */
11591 8
    public static function ucwords(
11592
        string $str,
11593
        array $exceptions = [],
11594
        string $charlist = '',
11595
        string $encoding = 'UTF-8',
11596
        bool $cleanUtf8 = false
11597
    ): string {
11598 8
        if (!$str) {
11599 2
            return '';
11600
        }
11601
11602
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11603
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11604
11605 7
        if ($cleanUtf8 === true) {
11606
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11607
            // if invalid characters are found in $haystack before $needle
11608 1
            $str = self::clean($str);
11609
        }
11610
11611 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11612
11613
        if (
11614 7
            $usePhpDefaultFunctions === true
11615
            &&
11616 7
            self::is_ascii($str) === true
11617
        ) {
11618
            return \ucwords($str);
11619
        }
11620
11621 7
        $words = self::str_to_words($str, $charlist);
11622 7
        $useExceptions = \count($exceptions) > 0;
11623
11624 7
        foreach ($words as &$word) {
11625 7
            if (!$word) {
11626 7
                continue;
11627
            }
11628
11629
            if (
11630 7
                $useExceptions === false
11631
                ||
11632 7
                !\in_array($word, $exceptions, true)
11633
            ) {
11634 7
                $word = self::ucfirst($word, $encoding);
11635
            }
11636
        }
11637
11638 7
        return \implode('', $words);
11639
    }
11640
11641
    /**
11642
     * Multi decode html entity & fix urlencoded-win1252-chars.
11643
     *
11644
     * e.g:
11645
     * 'test+test'                     => 'test test'
11646
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11647
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11648
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11649
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11650
     * 'Düsseldorf'                   => 'Düsseldorf'
11651
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11652
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11653
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11654
     *
11655
     * @param string $str          <p>The input string.</p>
11656
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11657
     *
11658
     * @return string
11659
     */
11660 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11661
    {
11662 4
        if ($str === '') {
11663 3
            return '';
11664
        }
11665
11666
        if (
11667 4
            \strpos($str, '&') === false
11668
            &&
11669 4
            \strpos($str, '%') === false
11670
            &&
11671 4
            \strpos($str, '+') === false
11672
            &&
11673 4
            \strpos($str, '\u') === false
11674
        ) {
11675 3
            return self::fix_simple_utf8($str);
11676
        }
11677
11678 4
        $str = self::urldecode_unicode_helper($str);
11679
11680
        do {
11681 4
            $str_compare = $str;
11682
11683
            /**
11684
             * @psalm-suppress PossiblyInvalidArgument
11685
             */
11686 4
            $str = self::fix_simple_utf8(
11687 4
                \urldecode(
11688 4
                    self::html_entity_decode(
11689 4
                        self::to_utf8($str),
11690 4
                        \ENT_QUOTES | \ENT_HTML5
11691
                    )
11692
                )
11693
            );
11694 4
        } while ($multi_decode === true && $str_compare !== $str);
11695
11696 4
        return $str;
11697
    }
11698
11699
    /**
11700
     * Return a array with "urlencoded"-win1252 -> UTF-8
11701
     *
11702
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11703
     *
11704
     * @return string[]
11705
     */
11706 2
    public static function urldecode_fix_win1252_chars(): array
11707
    {
11708
        return [
11709 2
            '%20' => ' ',
11710
            '%21' => '!',
11711
            '%22' => '"',
11712
            '%23' => '#',
11713
            '%24' => '$',
11714
            '%25' => '%',
11715
            '%26' => '&',
11716
            '%27' => "'",
11717
            '%28' => '(',
11718
            '%29' => ')',
11719
            '%2A' => '*',
11720
            '%2B' => '+',
11721
            '%2C' => ',',
11722
            '%2D' => '-',
11723
            '%2E' => '.',
11724
            '%2F' => '/',
11725
            '%30' => '0',
11726
            '%31' => '1',
11727
            '%32' => '2',
11728
            '%33' => '3',
11729
            '%34' => '4',
11730
            '%35' => '5',
11731
            '%36' => '6',
11732
            '%37' => '7',
11733
            '%38' => '8',
11734
            '%39' => '9',
11735
            '%3A' => ':',
11736
            '%3B' => ';',
11737
            '%3C' => '<',
11738
            '%3D' => '=',
11739
            '%3E' => '>',
11740
            '%3F' => '?',
11741
            '%40' => '@',
11742
            '%41' => 'A',
11743
            '%42' => 'B',
11744
            '%43' => 'C',
11745
            '%44' => 'D',
11746
            '%45' => 'E',
11747
            '%46' => 'F',
11748
            '%47' => 'G',
11749
            '%48' => 'H',
11750
            '%49' => 'I',
11751
            '%4A' => 'J',
11752
            '%4B' => 'K',
11753
            '%4C' => 'L',
11754
            '%4D' => 'M',
11755
            '%4E' => 'N',
11756
            '%4F' => 'O',
11757
            '%50' => 'P',
11758
            '%51' => 'Q',
11759
            '%52' => 'R',
11760
            '%53' => 'S',
11761
            '%54' => 'T',
11762
            '%55' => 'U',
11763
            '%56' => 'V',
11764
            '%57' => 'W',
11765
            '%58' => 'X',
11766
            '%59' => 'Y',
11767
            '%5A' => 'Z',
11768
            '%5B' => '[',
11769
            '%5C' => '\\',
11770
            '%5D' => ']',
11771
            '%5E' => '^',
11772
            '%5F' => '_',
11773
            '%60' => '`',
11774
            '%61' => 'a',
11775
            '%62' => 'b',
11776
            '%63' => 'c',
11777
            '%64' => 'd',
11778
            '%65' => 'e',
11779
            '%66' => 'f',
11780
            '%67' => 'g',
11781
            '%68' => 'h',
11782
            '%69' => 'i',
11783
            '%6A' => 'j',
11784
            '%6B' => 'k',
11785
            '%6C' => 'l',
11786
            '%6D' => 'm',
11787
            '%6E' => 'n',
11788
            '%6F' => 'o',
11789
            '%70' => 'p',
11790
            '%71' => 'q',
11791
            '%72' => 'r',
11792
            '%73' => 's',
11793
            '%74' => 't',
11794
            '%75' => 'u',
11795
            '%76' => 'v',
11796
            '%77' => 'w',
11797
            '%78' => 'x',
11798
            '%79' => 'y',
11799
            '%7A' => 'z',
11800
            '%7B' => '{',
11801
            '%7C' => '|',
11802
            '%7D' => '}',
11803
            '%7E' => '~',
11804
            '%7F' => '',
11805
            '%80' => '`',
11806
            '%81' => '',
11807
            '%82' => '‚',
11808
            '%83' => 'ƒ',
11809
            '%84' => '„',
11810
            '%85' => '…',
11811
            '%86' => '†',
11812
            '%87' => '‡',
11813
            '%88' => 'ˆ',
11814
            '%89' => '‰',
11815
            '%8A' => 'Š',
11816
            '%8B' => '‹',
11817
            '%8C' => 'Œ',
11818
            '%8D' => '',
11819
            '%8E' => 'Ž',
11820
            '%8F' => '',
11821
            '%90' => '',
11822
            '%91' => '‘',
11823
            '%92' => '’',
11824
            '%93' => '“',
11825
            '%94' => '”',
11826
            '%95' => '•',
11827
            '%96' => '–',
11828
            '%97' => '—',
11829
            '%98' => '˜',
11830
            '%99' => '™',
11831
            '%9A' => 'š',
11832
            '%9B' => '›',
11833
            '%9C' => 'œ',
11834
            '%9D' => '',
11835
            '%9E' => 'ž',
11836
            '%9F' => 'Ÿ',
11837
            '%A0' => '',
11838
            '%A1' => '¡',
11839
            '%A2' => '¢',
11840
            '%A3' => '£',
11841
            '%A4' => '¤',
11842
            '%A5' => '¥',
11843
            '%A6' => '¦',
11844
            '%A7' => '§',
11845
            '%A8' => '¨',
11846
            '%A9' => '©',
11847
            '%AA' => 'ª',
11848
            '%AB' => '«',
11849
            '%AC' => '¬',
11850
            '%AD' => '',
11851
            '%AE' => '®',
11852
            '%AF' => '¯',
11853
            '%B0' => '°',
11854
            '%B1' => '±',
11855
            '%B2' => '²',
11856
            '%B3' => '³',
11857
            '%B4' => '´',
11858
            '%B5' => 'µ',
11859
            '%B6' => '¶',
11860
            '%B7' => '·',
11861
            '%B8' => '¸',
11862
            '%B9' => '¹',
11863
            '%BA' => 'º',
11864
            '%BB' => '»',
11865
            '%BC' => '¼',
11866
            '%BD' => '½',
11867
            '%BE' => '¾',
11868
            '%BF' => '¿',
11869
            '%C0' => 'À',
11870
            '%C1' => 'Á',
11871
            '%C2' => 'Â',
11872
            '%C3' => 'Ã',
11873
            '%C4' => 'Ä',
11874
            '%C5' => 'Å',
11875
            '%C6' => 'Æ',
11876
            '%C7' => 'Ç',
11877
            '%C8' => 'È',
11878
            '%C9' => 'É',
11879
            '%CA' => 'Ê',
11880
            '%CB' => 'Ë',
11881
            '%CC' => 'Ì',
11882
            '%CD' => 'Í',
11883
            '%CE' => 'Î',
11884
            '%CF' => 'Ï',
11885
            '%D0' => 'Ð',
11886
            '%D1' => 'Ñ',
11887
            '%D2' => 'Ò',
11888
            '%D3' => 'Ó',
11889
            '%D4' => 'Ô',
11890
            '%D5' => 'Õ',
11891
            '%D6' => 'Ö',
11892
            '%D7' => '×',
11893
            '%D8' => 'Ø',
11894
            '%D9' => 'Ù',
11895
            '%DA' => 'Ú',
11896
            '%DB' => 'Û',
11897
            '%DC' => 'Ü',
11898
            '%DD' => 'Ý',
11899
            '%DE' => 'Þ',
11900
            '%DF' => 'ß',
11901
            '%E0' => 'à',
11902
            '%E1' => 'á',
11903
            '%E2' => 'â',
11904
            '%E3' => 'ã',
11905
            '%E4' => 'ä',
11906
            '%E5' => 'å',
11907
            '%E6' => 'æ',
11908
            '%E7' => 'ç',
11909
            '%E8' => 'è',
11910
            '%E9' => 'é',
11911
            '%EA' => 'ê',
11912
            '%EB' => 'ë',
11913
            '%EC' => 'ì',
11914
            '%ED' => 'í',
11915
            '%EE' => 'î',
11916
            '%EF' => 'ï',
11917
            '%F0' => 'ð',
11918
            '%F1' => 'ñ',
11919
            '%F2' => 'ò',
11920
            '%F3' => 'ó',
11921
            '%F4' => 'ô',
11922
            '%F5' => 'õ',
11923
            '%F6' => 'ö',
11924
            '%F7' => '÷',
11925
            '%F8' => 'ø',
11926
            '%F9' => 'ù',
11927
            '%FA' => 'ú',
11928
            '%FB' => 'û',
11929
            '%FC' => 'ü',
11930
            '%FD' => 'ý',
11931
            '%FE' => 'þ',
11932
            '%FF' => 'ÿ',
11933
        ];
11934
    }
11935
11936
    /**
11937
     * @param string $str
11938
     *
11939
     * @return string
11940
     */
11941 9
    private static function urldecode_unicode_helper(string $str): string
11942
    {
11943 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
11944 9
        if (\preg_match($pattern, $str)) {
11945 7
            $str = (string)\preg_replace($pattern, '&#x\\1;', $str);
11946
        }
11947
11948 9
        return $str;
11949
    }
11950
11951
    /**
11952
     * Decodes an UTF-8 string to ISO-8859-1.
11953
     *
11954
     * @param string $str           <p>The input string.</p>
11955
     * @param bool   $keepUtf8Chars
11956
     *
11957
     * @return string
11958
     */
11959 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11960
    {
11961 14
        if ($str === '') {
11962 6
            return '';
11963
        }
11964
11965
        // save for later comparision
11966 14
        $str_backup = $str;
11967 14
        $len = \strlen($str);
11968
11969 14
        if (self::$ORD === null) {
11970
            self::$ORD = self::getData('ord');
11971
        }
11972
11973 14
        if (self::$CHR === null) {
11974
            self::$CHR = self::getData('chr');
11975
        }
11976
11977 14
        $noCharFound = '?';
11978
        /** @noinspection ForeachInvariantsInspection */
11979 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11980 14
            switch ($str[$i] & "\xF0") {
11981 14
                case "\xC0":
11982 13
                case "\xD0":
11983 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11984 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11985
11986 13
                    break;
11987
11988
                /** @noinspection PhpMissingBreakStatementInspection */
11989 13
                case "\xF0":
11990
                    ++$i;
11991
11992
                // no break
11993
11994 13
                case "\xE0":
11995 11
                    $str[$j] = $noCharFound;
11996 11
                    $i += 2;
11997
11998 11
                    break;
11999
12000
                default:
12001 12
                    $str[$j] = $str[$i];
12002
            }
12003
        }
12004
12005 14
        $return = \substr($str, 0, $j);
12006 14
        if ($return === false) {
12007
            $return = '';
12008
        }
12009
12010
        if (
12011 14
            $keepUtf8Chars === true
12012
            &&
12013 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12014
        ) {
12015 2
            return $str_backup;
12016
        }
12017
12018 14
        return $return;
12019
    }
12020
12021
    /**
12022
     * Encodes an ISO-8859-1 string to UTF-8.
12023
     *
12024
     * @param string $str <p>The input string.</p>
12025
     *
12026
     * @return string
12027
     */
12028 14
    public static function utf8_encode(string $str): string
12029
    {
12030 14
        if ($str === '') {
12031 14
            return '';
12032
        }
12033
12034 14
        $str = \utf8_encode($str);
12035
12036
        // the polyfill maybe return false
12037
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12038
        /** @psalm-suppress TypeDoesNotContainType */
12039 14
        if ($str === false) {
12040
            return '';
12041
        }
12042
12043 14
        return $str;
12044
    }
12045
12046
    /**
12047
     * fix -> utf8-win1252 chars
12048
     *
12049
     * @param string $str <p>The input string.</p>
12050
     *
12051
     * @return string
12052
     *
12053
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12054
     */
12055 2
    public static function utf8_fix_win1252_chars(string $str): string
12056
    {
12057 2
        return self::fix_simple_utf8($str);
12058
    }
12059
12060
    /**
12061
     * Returns an array with all utf8 whitespace characters.
12062
     *
12063
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12064
     *
12065
     * @author: Derek E. [email protected]
12066
     *
12067
     * @return string[]
12068
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12069
     *                  as defined in above URL
12070
     */
12071 2
    public static function whitespace_table(): array
12072
    {
12073 2
        return self::$WHITESPACE_TABLE;
12074
    }
12075
12076
    /**
12077
     * Limit the number of words in a string.
12078
     *
12079
     * @param string $str      <p>The input string.</p>
12080
     * @param int    $limit    <p>The limit of words as integer.</p>
12081
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12082
     *
12083
     * @return string
12084
     */
12085 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12086
    {
12087 2
        if ($str === '' || $limit < 1) {
12088 2
            return '';
12089
        }
12090
12091 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12092
12093
        if (
12094 2
            !isset($matches[0])
12095
            ||
12096 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12097
        ) {
12098 2
            return $str;
12099
        }
12100
12101 2
        return \rtrim($matches[0]) . $strAddOn;
12102
    }
12103
12104
    /**
12105
     * Wraps a string to a given number of characters
12106
     *
12107
     * @see  http://php.net/manual/en/function.wordwrap.php
12108
     *
12109
     * @param string $str   <p>The input string.</p>
12110
     * @param int    $width [optional] <p>The column width.</p>
12111
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12112
     * @param bool   $cut   [optional] <p>
12113
     *                      If the cut is set to true, the string is
12114
     *                      always wrapped at or before the specified width. So if you have
12115
     *                      a word that is larger than the given width, it is broken apart.
12116
     *                      </p>
12117
     *
12118
     * @return string
12119
     *                <p>The given string wrapped at the specified column.</p>
12120
     */
12121 10
    public static function wordwrap(
12122
        string $str,
12123
        int $width = 75,
12124
        string $break = "\n",
12125
        bool $cut = false
12126
    ): string {
12127 10
        if ($str === '' || $break === '') {
12128 3
            return '';
12129
        }
12130
12131 8
        $strSplit = \explode($break, $str);
12132 8
        if ($strSplit === false) {
12133
            return '';
12134
        }
12135
12136 8
        $chars = [];
12137 8
        $wordSplit = '';
12138 8
        foreach ($strSplit as $i => $iValue) {
12139 8
            if ($i) {
12140 1
                $chars[] = $break;
12141 1
                $wordSplit .= '#';
12142
            }
12143
12144 8
            foreach (self::str_split($iValue) as $c) {
12145 8
                $chars[] = $c;
12146 8
                $wordSplit .= $c === ' ' ? ' ' : '?';
12147
            }
12148
        }
12149
12150 8
        $strReturn = '';
12151 8
        $j = 0;
12152 8
        $b = $i = -1;
12153 8
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12154
12155 8
        while (false !== $b = \mb_strpos($wordSplit, '#', $b + 1)) {
12156 6
            for (++$i; $i < $b; ++$i) {
12157 6
                $strReturn .= $chars[$j];
12158 6
                unset($chars[$j++]);
12159
            }
12160
12161
            if (
12162 6
                $break === $chars[$j]
12163
                ||
12164 6
                $chars[$j] === ' '
12165
            ) {
12166 3
                unset($chars[$j++]);
12167
            }
12168
12169 6
            $strReturn .= $break;
12170
        }
12171
12172 8
        return $strReturn . \implode('', $chars);
12173
    }
12174
12175
    /**
12176
     * Line-Wrap the string after $limit, but also after the next word.
12177
     *
12178
     * @param string $str
12179
     * @param int    $limit
12180
     *
12181
     * @return string
12182
     */
12183 1
    public static function wordwrap_per_line(string $str, int $limit): string
12184
    {
12185 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12186
12187 1
        $string = '';
12188 1
        foreach ($strings as &$value) {
12189 1
            if ($value === false) {
12190
                continue;
12191
            }
12192
12193 1
            $string .= \wordwrap($value, $limit);
12194 1
            $string .= "\n";
12195
        }
12196
12197 1
        return $string;
12198
    }
12199
12200
    /**
12201
     * Returns an array of Unicode White Space characters.
12202
     *
12203
     * @return string[] an array with numeric code point as key and White Space Character as value
12204
     */
12205 2
    public static function ws(): array
12206
    {
12207 2
        return self::$WHITESPACE;
12208
    }
12209
12210
    /**
12211
     * @return true|null
12212
     */
12213 9
    private static function initEmojiData()
12214
    {
12215 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12216 1
            if (self::$EMOJI === null) {
12217 1
                self::$EMOJI = self::getData('emoji');
12218
            }
12219
12220 1
            \uksort(
12221 1
                self::$EMOJI,
12222
                static function (string $a, string $b): int {
12223 1
                    return \strlen($b) <=> \strlen($a);
12224 1
                }
12225
            );
12226
12227 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12228 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12229
12230 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12231 1
                $tmpKey = \crc32($key);
12232 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12233
            }
12234
12235 1
            return true;
12236
        }
12237
12238 9
        return null;
12239
    }
12240
12241
    /**
12242
     * @param string $str
12243
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12244
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12245
     *
12246
     * @return string
12247
     */
12248 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12249
    {
12250 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12251 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12252
12253 33
        if ($useLower === true) {
12254 2
            $str = \str_replace(
12255 2
                $upper,
12256 2
                $lower,
12257 2
                $str
12258
            );
12259
        } else {
12260 31
            $str = \str_replace(
12261 31
                $lower,
12262 31
                $upper,
12263 31
                $str
12264
            );
12265
        }
12266
12267 33
        if ($fullCaseFold) {
12268 31
            static $FULL_CASE_FOLD = null;
12269 31
            if ($FULL_CASE_FOLD === null) {
12270 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12271
            }
12272
12273 31
            if ($useLower === true) {
12274 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12275
            } else {
12276 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12277
            }
12278
        }
12279
12280 33
        return $str;
12281
    }
12282
12283
    /**
12284
     * get data from "/data/*.php"
12285
     *
12286
     * @param string $file
12287
     *
12288
     * @return array
12289
     */
12290 6
    private static function getData(string $file): array
12291
    {
12292
        /** @noinspection PhpIncludeInspection */
12293
        /** @noinspection UsingInclusionReturnValueInspection */
12294
        /** @psalm-suppress UnresolvableInclude */
12295 6
        return include __DIR__ . '/data/' . $file . '.php';
12296
    }
12297
12298
    /**
12299
     * get data from "/data/*.php"
12300
     *
12301
     * @param string $file
12302
     *
12303
     * @return false|mixed will return false on error
12304
     */
12305 9
    private static function getDataIfExists(string $file)
12306
    {
12307 9
        $file = __DIR__ . '/data/' . $file . '.php';
12308 9
        if (\file_exists($file)) {
12309
            /** @noinspection PhpIncludeInspection */
12310
            /** @noinspection UsingInclusionReturnValueInspection */
12311 8
            return include $file;
12312
        }
12313
12314 2
        return false;
12315
    }
12316
12317
    /**
12318
     * Checks whether mbstring "overloaded" is active on the server.
12319
     *
12320
     * @return bool
12321
     */
12322
    private static function mbstring_overloaded(): bool
12323
    {
12324
        /**
12325
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12326
         */
12327
12328
        /** @noinspection PhpComposerExtensionStubsInspection */
12329
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12330
        return \defined('MB_OVERLOAD_STRING')
12331
               &&
12332
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12333
    }
12334
12335
    /**
12336
     * @param array $strings
12337
     * @param bool  $removeEmptyValues
12338
     * @param int   $removeShortValues
12339
     *
12340
     * @return array
12341
     */
12342 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12343
    {
12344
        // init
12345 2
        $return = [];
12346
12347 2
        foreach ($strings as &$str) {
12348
            if (
12349 2
                $removeShortValues !== null
12350
                &&
12351 2
                \mb_strlen($str) <= $removeShortValues
12352
            ) {
12353 2
                continue;
12354
            }
12355
12356
            if (
12357 2
                $removeEmptyValues === true
12358
                &&
12359 2
                \trim($str) === ''
12360
            ) {
12361 2
                continue;
12362
            }
12363
12364 2
            $return[] = $str;
12365
        }
12366
12367 2
        return $return;
12368
    }
12369
12370
    /**
12371
     * rxClass
12372
     *
12373
     * @param string $s
12374
     * @param string $class
12375
     *
12376
     * @return string
12377
     */
12378 33
    private static function rxClass(string $s, string $class = ''): string
12379
    {
12380 33
        static $RX_CLASS_CACHE = [];
12381
12382 33
        $cacheKey = $s . $class;
12383
12384 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12385 21
            return $RX_CLASS_CACHE[$cacheKey];
12386
        }
12387
12388 16
        $classArray = [$class];
12389
12390
        /** @noinspection SuspiciousLoopInspection */
12391
        /** @noinspection AlterInForeachInspection */
12392 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12393 15
            if ($s === '-') {
12394
                $classArray[0] = '-' . $classArray[0];
12395 15
            } elseif (!isset($s[2])) {
12396 15
                $classArray[0] .= \preg_quote($s, '/');
12397 1
            } elseif (self::strlen($s) === 1) {
12398 1
                $classArray[0] .= $s;
12399
            } else {
12400 15
                $classArray[] = $s;
12401
            }
12402
        }
12403
12404 16
        if ($classArray[0]) {
12405 16
            $classArray[0] = '[' . $classArray[0] . ']';
12406
        }
12407
12408 16
        if (\count($classArray) === 1) {
12409 16
            $return = $classArray[0];
12410
        } else {
12411
            $return = '(?:' . \implode('|', $classArray) . ')';
12412
        }
12413
12414 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12415
12416 16
        return $return;
12417
    }
12418
12419
    /**
12420
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12421
     *
12422
     * @param string $names
12423
     * @param string $delimiter
12424
     * @param string $encoding
12425
     *
12426
     * @return string
12427
     */
12428 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12429
    {
12430
        // init
12431 1
        $namesArray = \explode($delimiter, $names);
12432
12433 1
        if ($namesArray === false) {
12434
            return '';
12435
        }
12436
12437
        $specialCases = [
12438 1
            'names' => [
12439
                'ab',
12440
                'af',
12441
                'al',
12442
                'and',
12443
                'ap',
12444
                'bint',
12445
                'binte',
12446
                'da',
12447
                'de',
12448
                'del',
12449
                'den',
12450
                'der',
12451
                'di',
12452
                'dit',
12453
                'ibn',
12454
                'la',
12455
                'mac',
12456
                'nic',
12457
                'of',
12458
                'ter',
12459
                'the',
12460
                'und',
12461
                'van',
12462
                'von',
12463
                'y',
12464
                'zu',
12465
            ],
12466
            'prefixes' => [
12467
                'al-',
12468
                "d'",
12469
                'ff',
12470
                "l'",
12471
                'mac',
12472
                'mc',
12473
                'nic',
12474
            ],
12475
        ];
12476
12477 1
        foreach ($namesArray as &$name) {
12478 1
            if (\in_array($name, $specialCases['names'], true)) {
12479 1
                continue;
12480
            }
12481
12482 1
            $continue = false;
12483
12484 1
            if ($delimiter === '-') {
12485 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12486 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12487 1
                        $continue = true;
12488
                    }
12489
                }
12490 1
                unset($beginning);
12491
            }
12492
12493 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12494 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12495 1
                    $continue = true;
12496
                }
12497
            }
12498 1
            unset($beginning);
12499
12500 1
            if ($continue === true) {
12501 1
                continue;
12502
            }
12503
12504 1
            $name = self::ucfirst($name);
12505
        }
12506
12507 1
        return \implode($delimiter, $namesArray);
12508
    }
12509
12510
    /**
12511
     * Generic case sensitive transformation for collation matching.
12512
     *
12513
     * @param string $str <p>The input string</p>
12514
     *
12515
     * @return string|null
12516
     */
12517 6
    private static function strtonatfold(string $str)
12518
    {
12519 6
        return \preg_replace(
12520 6
            '/\p{Mn}+/u',
12521 6
            '',
12522 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12523
        );
12524
    }
12525
12526
    /**
12527
     * @param int|string $input
12528
     *
12529
     * @return string
12530
     */
12531 31
    private static function to_utf8_convert_helper($input): string
12532
    {
12533
        // init
12534 31
        $buf = '';
12535
12536 31
        if (self::$ORD === null) {
12537 1
            self::$ORD = self::getData('ord');
12538
        }
12539
12540 31
        if (self::$CHR === null) {
12541 1
            self::$CHR = self::getData('chr');
12542
        }
12543
12544 31
        if (self::$WIN1252_TO_UTF8 === null) {
12545 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12546
        }
12547
12548 31
        $ordC1 = self::$ORD[$input];
12549 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12550 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12551
        } else {
12552 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12553 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12554 1
            $buf .= $cc1 . $cc2;
12555
        }
12556
12557 31
        return $buf;
12558
    }
12559
}
12560