Passed
Push — master ( 938352...ae54fc )
by Lars
05:07
created

UTF8   F

Complexity

Total Complexity 1681

Size/Duplication

Total Lines 12463
Duplicated Lines 0 %

Test Coverage

Coverage 79.75%

Importance

Changes 89
Bugs 49 Features 4
Metric Value
eloc 4252
c 89
b 49
f 4
dl 0
loc 12463
ccs 2989
cts 3748
cp 0.7975
rs 0.8
wmc 1681

298 Methods

Rating   Name   Duplication   Size   Complexity  
A strchr() 0 8 1
A strcasecmp() 0 5 1
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A is_blank() 0 8 2
A is_binary_file() 0 16 3
A is_empty() 0 3 1
A isBom() 0 3 1
A isJson() 0 3 1
A isHtml() 0 3 1
A chr_to_decimal() 0 30 6
A str_substr_after_first_separator() 0 28 6
A file_has_bom() 0 8 2
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 70 10
A add_bom_to_string() 0 7 2
A parse_str() 0 16 4
A filter_input() 0 13 2
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A array_change_key_case() 0 20 5
A get_unique_string() 0 15 2
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 25 5
A substr_left() 0 15 4
A count_chars() 0 11 1
D strlen() 0 96 19
A str_isubstr_last() 0 20 4
A ctype_loaded() 0 3 1
A str_replace_beginning() 0 21 6
A has_uppercase() 0 8 2
A remove_left() 0 21 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
C utf8_decode() 0 60 13
A ltrim() 0 19 4
A emoji_decode() 0 16 2
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 51 10
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A lcword() 0 8 1
A str_pad_both() 0 7 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
D chr() 0 101 18
A html_escape() 0 6 1
A toUTF8() 0 3 1
A string() 0 10 1
C normalize_encoding() 0 134 14
B rxClass() 0 39 8
B get_file_type() 0 59 7
A str_ensure_right() 0 13 4
A chr_to_int() 0 3 1
B str_titleize_for_humans() 0 155 5
C is_utf16() 0 65 16
C filter() 0 59 13
A normalize_whitespace() 0 9 1
A str_starts_with() 0 11 3
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 14 2
A decode_mimeheader() 0 15 5
C substr_count_in_byte() 0 54 15
A html_decode() 0 3 1
A strichr() 0 8 1
A isUtf32() 0 3 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 73 16
A regex_replace() 0 20 3
A chunk_split() 0 3 1
A titlecase() 0 31 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 16 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 12 4
A emoji_encode() 0 16 2
A str_matches_pattern() 0 3 1
A is_alpha() 0 8 2
C str_titleize() 0 69 12
A ws() 0 3 1
B get_random_string() 0 53 10
A str_replace_first() 0 17 2
A fix_utf8() 0 30 4
A toLatin1() 0 3 1
A str_pad_right() 0 7 1
B ucwords() 0 48 9
A first_char() 0 11 4
A to_boolean() 0 35 5
C stristr() 0 68 15
A isUtf8() 0 3 1
A strncasecmp() 0 10 1
B strwidth() 0 40 8
A str_iends() 0 3 1
A css_stripe_media_queries() 0 6 1
A trim() 0 19 4
A clean() 0 47 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 8 2
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 10 3
B strtr() 0 34 8
A str_contains_all() 0 23 6
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 23 5
D range() 0 65 23
B strspn() 0 30 10
A str_transliterate() 0 3 1
B rawurldecode() 0 37 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 80 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 14 1
A substr_iright() 0 15 4
D getCharDirection() 0 105 118
A htmlspecialchars() 0 11 3
A replace() 0 11 2
A filter_var_array() 0 9 2
A __construct() 0 2 1
A decimal_to_chr() 0 3 1
A to_iso8859() 0 16 4
A words_limit() 0 17 5
A strip_tags() 0 15 4
A pcre_utf8_support() 0 4 1
B between() 0 48 8
A str_isubstr_before_last_separator() 0 21 6
D str_truncate_safe() 0 78 18
A codepoints() 0 29 4
A substr_right() 0 31 6
A lowerCaseFirst() 0 8 1
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A chr_map() 0 5 1
A strrpos_in_byte() 0 12 4
A cleanup() 0 25 2
F strrpos() 0 118 25
A remove_right() 0 22 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A char_at() 0 7 2
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 15 4
A chars() 0 3 1
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
A finfo_loaded() 0 3 1
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
A fits_inside() 0 3 1
A to_ascii() 0 3 1
A intl_loaded() 0 3 1
A reduce_string_array() 0 26 6
B str_longest_common_prefix() 0 48 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 7 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 23 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
B str_snakeize() 0 55 6
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 3 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A hasBom() 0 3 1
A toAscii() 0 3 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A str_upper_first() 0 13 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A filter_var() 0 9 2
A substr_ileft() 0 15 4
B html_encode() 0 42 7
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 139 37
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C is_utf32() 0 65 16
C ord() 0 72 16
A is_alphanumeric() 0 8 2
A strtonatfold() 0 6 1
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
C strcspn() 0 51 12
A checkForSupport() 0 47 4
B is_json() 0 27 8
A fixStrCaseHelper() 0 33 5
A int_to_hex() 0 7 2
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 16 5
A str_last_char() 0 13 4
A str_ireplace_beginning() 0 21 6
A hex_to_int() 0 14 3
A htmlentities() 0 23 3
A hex_to_chr() 0 3 1
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A filter_input_array() 0 9 2
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 35 5
A chr_to_hex() 0 11 3
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A min() 0 14 3
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 86 17
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 26 4
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
A access() 0 11 4
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
B file_get_contents() 0 55 11
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 9 2
A callback() 0 3 1
A symfony_polyfill_used() 0 16 5
A binary_to_str() 0 12 3
A bom() 0 3 1
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
118
    ];
119
120
    /**
121
     * @var array
122
     */
123
    private static $COMMON_CASE_FOLD = [
124
        'upper' => [
125
            'µ',
126
            'ſ',
127
            "\xCD\x85",
128
            'ς',
129
            'ẞ',
130
            "\xCF\x90",
131
            "\xCF\x91",
132
            "\xCF\x95",
133
            "\xCF\x96",
134
            "\xCF\xB0",
135
            "\xCF\xB1",
136
            "\xCF\xB5",
137
            "\xE1\xBA\x9B",
138
            "\xE1\xBE\xBE",
139
        ],
140
        'lower' => [
141
            'μ',
142
            's',
143
            'ι',
144
            'σ',
145
            'ß',
146
            'β',
147
            'θ',
148
            'φ',
149
            'π',
150
            'κ',
151
            'ρ',
152
            'ε',
153
            "\xE1\xB9\xA1",
154
            'ι',
155
        ],
156
    ];
157
158
    /**
159
     * @var array
160
     */
161
    private static $SUPPORT = [];
162
163
    /**
164
     * @var array|null
165
     */
166
    private static $BROKEN_UTF8_FIX;
167
168
    /**
169
     * @var array|null
170
     */
171
    private static $WIN1252_TO_UTF8;
172
173
    /**
174
     * @var array|null
175
     */
176
    private static $INTL_TRANSLITERATOR_LIST;
177
178
    /**
179
     * @var array|null
180
     */
181
    private static $ENCODINGS;
182
183
    /**
184
     * @var array|null
185
     */
186
    private static $ORD;
187
188
    /**
189
     * @var array|null
190
     */
191
    private static $EMOJI;
192
193
    /**
194
     * @var array|null
195
     */
196
    private static $EMOJI_VALUES_CACHE;
197
198
    /**
199
     * @var array|null
200
     */
201
    private static $EMOJI_KEYS_CACHE;
202
203
    /**
204
     * @var array|null
205
     */
206
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
207
208
    /**
209
     * @var array|null
210
     */
211
    private static $CHR;
212
213
    /**
214
     * __construct()
215
     */
216 33
    public function __construct()
217
    {
218 33
    }
219
220
    /**
221
     * Return the character at the specified position: $str[1] like functionality.
222
     *
223
     * @param string $str      <p>A UTF-8 string.</p>
224
     * @param int    $pos      <p>The position of character to return.</p>
225
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
226
     *
227
     * @return string single multi-byte character
228
     */
229 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
230
    {
231 3
        if ($str === '' || $pos < 0) {
232 2
            return '';
233
        }
234
235 3
        if ($encoding === 'UTF-8') {
236 3
            return (string) \mb_substr($str, $pos, 1);
237
        }
238
239
        return (string) self::substr($str, $pos, 1, $encoding);
240
    }
241
242
    /**
243
     * Prepends UTF-8 BOM character to the string and returns the whole string.
244
     *
245
     * INFO: If BOM already existed there, the Input string is returned.
246
     *
247
     * @param string $str <p>The input string.</p>
248
     *
249
     * @return string the output string that contains BOM
250
     */
251 2
    public static function add_bom_to_string(string $str): string
252
    {
253 2
        if (self::string_has_bom($str) === false) {
254 2
            $str = self::bom() . $str;
255
        }
256
257 2
        return $str;
258
    }
259
260
    /**
261
     * Changes all keys in an array.
262
     *
263
     * @param array  $array    <p>The array to work on</p>
264
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
265
     *                         or <strong>CASE_LOWER</strong> (default)</p>
266
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
267
     *
268
     * @return string[] an array with its keys lower or uppercased
269
     */
270 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
271
    {
272
        if (
273 2
            $case !== \CASE_LOWER
274
            &&
275 2
            $case !== \CASE_UPPER
276
        ) {
277
            $case = \CASE_LOWER;
278
        }
279
280 2
        $return = [];
281 2
        foreach ($array as $key => &$value) {
282 2
            $key = $case === \CASE_LOWER
283 2
                ? self::strtolower((string) $key, $encoding)
284 2
                : self::strtoupper((string) $key, $encoding);
285
286 2
            $return[$key] = $value;
287
        }
288
289 2
        return $return;
290
    }
291
292
    /**
293
     * Returns the substring between $start and $end, if found, or an empty
294
     * string. An optional offset may be supplied from which to begin the
295
     * search for the start string.
296
     *
297
     * @param string $str
298
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
299
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
300
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
301
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
302
     *
303
     * @return string
304
     */
305 16
    public static function between(
306
        string $str,
307
        string $start,
308
        string $end,
309
        int $offset = 0,
310
        string $encoding = 'UTF-8'
311
    ): string {
312 16
        if ($encoding === 'UTF-8') {
313 8
            $posStart = \mb_strpos($str, $start, $offset);
314 8
            if ($posStart === false) {
315 1
                return '';
316
            }
317
318 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
319 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
320
            if (
321 7
                $posEnd === false
322
                ||
323 7
                $posEnd === $substrIndex
324
            ) {
325 2
                return '';
326
            }
327
328 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
329
        }
330
331 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
332
333 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
334 8
        if ($posStart === false) {
335 1
            return '';
336
        }
337
338 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
339 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
340
        if (
341 7
            $posEnd === false
342
            ||
343 7
            $posEnd === $substrIndex
344
        ) {
345 2
            return '';
346
        }
347
348 5
        return (string) self::substr(
349 5
            $str,
350 5
            $substrIndex,
351 5
            $posEnd - $substrIndex,
352 5
            $encoding
353
        );
354
    }
355
356
    /**
357
     * Convert binary into an string.
358
     *
359
     * @param mixed $bin 1|0
360
     *
361
     * @return string
362
     */
363 2
    public static function binary_to_str($bin): string
364
    {
365 2
        if (!isset($bin[0])) {
366
            return '';
367
        }
368
369 2
        $convert = \base_convert($bin, 2, 16);
370 2
        if ($convert === '0') {
371 1
            return '';
372
        }
373
374 2
        return \pack('H*', $convert);
375
    }
376
377
    /**
378
     * Returns the UTF-8 Byte Order Mark Character.
379
     *
380
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
381
     *
382
     * @return string UTF-8 Byte Order Mark
383
     */
384 4
    public static function bom(): string
385
    {
386 4
        return "\xef\xbb\xbf";
387
    }
388
389
    /**
390
     * @alias of UTF8::chr_map()
391
     *
392
     * @param array|string $callback
393
     * @param string       $str
394
     *
395
     * @return string[]
396
     *
397
     * @see UTF8::chr_map()
398
     */
399 2
    public static function callback($callback, string $str): array
400
    {
401 2
        return self::chr_map($callback, $str);
402
    }
403
404
    /**
405
     * Returns the character at $index, with indexes starting at 0.
406
     *
407
     * @param string $str      <p>The input string.</p>
408
     * @param int    $index    <p>Position of the character.</p>
409
     * @param string $encoding [optional] <p>Default is UTF-8</p>
410
     *
411
     * @return string the character at $index
412
     */
413 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
414
    {
415 9
        if ($encoding === 'UTF-8') {
416 5
            return (string) \mb_substr($str, $index, 1);
417
        }
418
419 4
        return (string) self::substr($str, $index, 1, $encoding);
420
    }
421
422
    /**
423
     * Returns an array consisting of the characters in the string.
424
     *
425
     * @param string $str <p>The input string.</p>
426
     *
427
     * @return string[] an array of chars
428
     */
429 3
    public static function chars(string $str): array
430
    {
431 3
        return self::str_split($str);
432
    }
433
434
    /**
435
     * This method will auto-detect your server environment for UTF-8 support.
436
     *
437
     * @return true|null
438
     *
439
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
440
     */
441 5
    public static function checkForSupport()
442
    {
443 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
444
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
445
446
            // http://php.net/manual/en/book.mbstring.php
447
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
448
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
449
            if (self::$SUPPORT['mbstring'] === true) {
450
                \mb_internal_encoding('UTF-8');
451
                /** @noinspection UnusedFunctionResultInspection */
452
                /** @noinspection PhpComposerExtensionStubsInspection */
453
                \mb_regex_encoding('UTF-8');
454
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
455
            }
456
457
            // http://php.net/manual/en/book.iconv.php
458
            self::$SUPPORT['iconv'] = self::iconv_loaded();
459
460
            // http://php.net/manual/en/book.intl.php
461
            self::$SUPPORT['intl'] = self::intl_loaded();
462
463
            // http://php.net/manual/en/class.intlchar.php
464
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
465
466
            // http://php.net/manual/en/book.ctype.php
467
            self::$SUPPORT['ctype'] = self::ctype_loaded();
468
469
            // http://php.net/manual/en/class.finfo.php
470
            self::$SUPPORT['finfo'] = self::finfo_loaded();
471
472
            // http://php.net/manual/en/book.json.php
473
            self::$SUPPORT['json'] = self::json_loaded();
474
475
            // http://php.net/manual/en/book.pcre.php
476
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
477
478
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
479
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
480
                \mb_internal_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            return true;
485
        }
486
487 5
        return null;
488
    }
489
490
    /**
491
     * Generates a UTF-8 encoded character from the given code point.
492
     *
493
     * INFO: opposite to UTF8::ord()
494
     *
495
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
496
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
497
     *
498
     * @return string|null multi-byte character, returns null on failure or empty input
499
     */
500 25
    public static function chr($code_point, string $encoding = 'UTF-8')
501
    {
502
        // init
503 25
        static $CHAR_CACHE = [];
504
505 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
506 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
507
        }
508
509
        if (
510 25
            $encoding !== 'UTF-8'
511
            &&
512 25
            $encoding !== 'ISO-8859-1'
513
            &&
514 25
            $encoding !== 'WINDOWS-1252'
515
            &&
516 25
            self::$SUPPORT['mbstring'] === false
517
        ) {
518
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
519
        }
520
521 25
        $cacheKey = $code_point . $encoding;
522 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
523 23
            return $CHAR_CACHE[$cacheKey];
524
        }
525
526 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
527
528 13
            if (self::$CHR === null) {
529
                self::$CHR = self::getData('chr');
530
            }
531
532
            /**
533
             * @psalm-suppress PossiblyNullArrayAccess
534
             */
535 13
            $chr = self::$CHR[$code_point];
536
537 13
            if ($encoding !== 'UTF-8') {
538 1
                $chr = self::encode($encoding, $chr);
539
            }
540
541 13
            return $CHAR_CACHE[$cacheKey] = $chr;
542
        }
543
544
        //
545
        // fallback via "IntlChar"
546
        //
547
548 7
        if (self::$SUPPORT['intlChar'] === true) {
549
            /** @noinspection PhpComposerExtensionStubsInspection */
550 7
            $chr = \IntlChar::chr($code_point);
551
552 7
            if ($encoding !== 'UTF-8') {
553
                $chr = self::encode($encoding, $chr);
554
            }
555
556 7
            return $CHAR_CACHE[$cacheKey] = $chr;
557
        }
558
559
        //
560
        // fallback via vanilla php
561
        //
562
563
        if (self::$CHR === null) {
564
            self::$CHR = self::getData('chr');
565
        }
566
567
        $code_point = (int) $code_point;
568
        if ($code_point <= 0x7F) {
569
            /**
570
             * @psalm-suppress PossiblyNullArrayAccess
571
             */
572
            $chr = self::$CHR[$code_point];
573
        } elseif ($code_point <= 0x7FF) {
574
            /**
575
             * @psalm-suppress PossiblyNullArrayAccess
576
             */
577
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
578
                   self::$CHR[($code_point & 0x3F) + 0x80];
579
        } elseif ($code_point <= 0xFFFF) {
580
            /**
581
             * @psalm-suppress PossiblyNullArrayAccess
582
             */
583
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
584
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
585
                   self::$CHR[($code_point & 0x3F) + 0x80];
586
        } else {
587
            /**
588
             * @psalm-suppress PossiblyNullArrayAccess
589
             */
590
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
591
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
592
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
593
                   self::$CHR[($code_point & 0x3F) + 0x80];
594
        }
595
596
        if ($encoding !== 'UTF-8') {
597
            $chr = self::encode($encoding, $chr);
598
        }
599
600
        return $CHAR_CACHE[$cacheKey] = $chr;
601
    }
602
603
    /**
604
     * Applies callback to all characters of a string.
605
     *
606
     * @param array|string $callback <p>The callback function.</p>
607
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
608
     *
609
     * @return string[] the outcome of callback
610
     */
611 2
    public static function chr_map($callback, string $str): array
612
    {
613 2
        return \array_map(
614 2
            $callback,
615 2
            self::str_split($str)
616
        );
617
    }
618
619
    /**
620
     * Generates an array of byte length of each character of a Unicode string.
621
     *
622
     * 1 byte => U+0000  - U+007F
623
     * 2 byte => U+0080  - U+07FF
624
     * 3 byte => U+0800  - U+FFFF
625
     * 4 byte => U+10000 - U+10FFFF
626
     *
627
     * @param string $str <p>The original unicode string.</p>
628
     *
629
     * @return int[] an array of byte lengths of each character
630
     */
631 4
    public static function chr_size_list(string $str): array
632
    {
633 4
        if ($str === '') {
634 4
            return [];
635
        }
636
637 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
638
            return \array_map(
639
                static function (string $data): int {
640
                    // "mb_" is available if overload is used, so use it ...
641
                    return \mb_strlen($data, 'CP850'); // 8-BIT
642
                },
643
                self::str_split($str)
644
            );
645
        }
646
647 4
        return \array_map('\strlen', self::str_split($str));
648
    }
649
650
    /**
651
     * Get a decimal code representation of a specific character.
652
     *
653
     * @param string $char <p>The input character.</p>
654
     *
655
     * @return int
656
     */
657 4
    public static function chr_to_decimal(string $char): int
658
    {
659 4
        $code = self::ord($char[0]);
660 4
        $bytes = 1;
661
662 4
        if (!($code & 0x80)) {
663
            // 0xxxxxxx
664 4
            return $code;
665
        }
666
667 4
        if (($code & 0xe0) === 0xc0) {
668
            // 110xxxxx
669 4
            $bytes = 2;
670 4
            $code &= ~0xc0;
671 4
        } elseif (($code & 0xf0) === 0xe0) {
672
            // 1110xxxx
673 4
            $bytes = 3;
674 4
            $code &= ~0xe0;
675 2
        } elseif (($code & 0xf8) === 0xf0) {
676
            // 11110xxx
677 2
            $bytes = 4;
678 2
            $code &= ~0xf0;
679
        }
680
681 4
        for ($i = 2; $i <= $bytes; ++$i) {
682
            // 10xxxxxx
683 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
684
        }
685
686 4
        return $code;
687
    }
688
689
    /**
690
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
691
     *
692
     * @param int|string $char <p>The input character</p>
693
     * @param string     $pfix [optional]
694
     *
695
     * @return string The code point encoded as U+xxxx
696
     */
697 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
698
    {
699 2
        if ($char === '') {
700 2
            return '';
701
        }
702
703 2
        if ($char === '&#0;') {
704 2
            $char = '';
705
        }
706
707 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
708
    }
709
710
    /**
711
     * alias for "UTF8::chr_to_decimal()"
712
     *
713
     * @param string $chr
714
     *
715
     * @return int
716
     *
717
     * @see UTF8::chr_to_decimal()
718
     */
719 2
    public static function chr_to_int(string $chr): int
720
    {
721 2
        return self::chr_to_decimal($chr);
722
    }
723
724
    /**
725
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
726
     *
727
     * @param string $body     <p>The original string to be split.</p>
728
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
729
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
730
     *
731
     * @return string the chunked string
732
     */
733 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
734
    {
735 4
        return \implode($end, self::str_split($body, $chunklen));
736
    }
737
738
    /**
739
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
740
     *
741
     * @param string $str                           <p>The string to be sanitized.</p>
742
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
743
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
744
     *                                              whitespace.</p>
745
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
746
     *                                              e.g.: "…"
747
     *                                              => "..."</p>
748
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
749
     *                                              combination with
750
     *                                              $normalize_whitespace</p>
751
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
752
     *                                              mark e.g.: "�"</p>
753
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
754
     *                                              characters e.g.: "\0"</p>
755
     *
756
     * @return string clean UTF-8 encoded string
757
     */
758 87
    public static function clean(
759
        string $str,
760
        bool $remove_bom = false,
761
        bool $normalize_whitespace = false,
762
        bool $normalize_msword = false,
763
        bool $keep_non_breaking_space = false,
764
        bool $replace_diamond_question_mark = false,
765
        bool $remove_invisible_characters = true
766
    ): string {
767
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
768
        // caused connection reset problem on larger strings
769
770 87
        $regex = '/
771
          (
772
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
773
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
774
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
775
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
776
            ){1,100}                      # ...one or more times
777
          )
778
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
779
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
780
        /x';
781
        /** @noinspection NotOptimalRegularExpressionsInspection */
782 87
        $str = (string) \preg_replace($regex, '$1', $str);
783
784 87
        if ($replace_diamond_question_mark === true) {
785 33
            $str = self::replace_diamond_question_mark($str, '');
786
        }
787
788 87
        if ($remove_invisible_characters === true) {
789 87
            $str = self::remove_invisible_characters($str);
790
        }
791
792 87
        if ($normalize_whitespace === true) {
793 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
794
        }
795
796 87
        if ($normalize_msword === true) {
797 4
            $str = self::normalize_msword($str);
798
        }
799
800 87
        if ($remove_bom === true) {
801 37
            $str = self::remove_bom($str);
802
        }
803
804 87
        return $str;
805
    }
806
807
    /**
808
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
809
     *
810
     * @param string $str <p>The input string.</p>
811
     *
812
     * @return string
813
     */
814 33
    public static function cleanup($str): string
815
    {
816
        // init
817 33
        $str = (string) $str;
818
819 33
        if ($str === '') {
820 5
            return '';
821
        }
822
823
        // fixed ISO <-> UTF-8 Errors
824 33
        $str = self::fix_simple_utf8($str);
825
826
        // remove all none UTF-8 symbols
827
        // && remove diamond question mark (�)
828
        // && remove remove invisible characters (e.g. "\0")
829
        // && remove BOM
830
        // && normalize whitespace chars (but keep non-breaking-spaces)
831 33
        return self::clean(
832 33
            $str,
833 33
            true,
834 33
            true,
835 33
            false,
836 33
            true,
837 33
            true,
838 33
            true
839
        );
840
    }
841
842
    /**
843
     * Accepts a string or a array of strings and returns an array of Unicode code points.
844
     *
845
     * INFO: opposite to UTF8::string()
846
     *
847
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
848
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
849
     *                                 default, code points will be returned as integers.</p>
850
     *
851
     * @return array<int|string>
852
     *                           The array of code points:<br>
853
     *                           array<int> for $u_style === false<br>
854
     *                           array<string> for $u_style === true<br>
855
     */
856 12
    public static function codepoints($arg, bool $u_style = false): array
857
    {
858 12
        if (\is_string($arg) === true) {
859 12
            $arg = self::str_split($arg);
860
        }
861
862 12
        $arg = \array_map(
863
            [
864 12
                self::class,
865
                'ord',
866
            ],
867 12
            $arg
868
        );
869
870 12
        if (\count($arg) === 0) {
871 7
            return [];
872
        }
873
874 11
        if ($u_style === true) {
875 2
            $arg = \array_map(
876
                [
877 2
                    self::class,
878
                    'int_to_hex',
879
                ],
880 2
                $arg
881
            );
882
        }
883
884 11
        return $arg;
885
    }
886
887
    /**
888
     * Trims the string and replaces consecutive whitespace characters with a
889
     * single space. This includes tabs and newline characters, as well as
890
     * multibyte whitespace such as the thin space and ideographic space.
891
     *
892
     * @param string $str <p>The input string.</p>
893
     *
894
     * @return string string with a trimmed $str and condensed whitespace
895
     */
896 13
    public static function collapse_whitespace(string $str): string
897
    {
898 13
        if (self::$SUPPORT['mbstring'] === true) {
899
            /** @noinspection PhpComposerExtensionStubsInspection */
900 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
901
        }
902
903
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
904
    }
905
906
    /**
907
     * Returns count of characters used in a string.
908
     *
909
     * @param string $str                <p>The input string.</p>
910
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
911
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
912
     *
913
     * @return int[] an associative array of Character as keys and
914
     *               their count as values
915
     */
916 19
    public static function count_chars(
917
        string $str,
918
        bool $cleanUtf8 = false,
919
        bool $tryToUseMbFunction = true
920
    ): array {
921 19
        return \array_count_values(
922 19
            self::str_split(
923 19
                $str,
924 19
                1,
925 19
                $cleanUtf8,
926 19
                $tryToUseMbFunction
927
            )
928
        );
929
    }
930
931
    /**
932
     * Remove css media-queries.
933
     *
934
     * @param string $str
935
     *
936
     * @return string
937
     */
938 1
    public static function css_stripe_media_queries(string $str): string
939
    {
940 1
        return (string) \preg_replace(
941 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
942 1
            '',
943 1
            $str
944
        );
945
    }
946
947
    /**
948
     * Checks whether ctype is available on the server.
949
     *
950
     * @return bool
951
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
952
     */
953
    public static function ctype_loaded(): bool
954
    {
955
        return \extension_loaded('ctype');
956
    }
957
958
    /**
959
     * Converts a int-value into an UTF-8 character.
960
     *
961
     * @param mixed $int
962
     *
963
     * @return string
964
     */
965 19
    public static function decimal_to_chr($int): string
966
    {
967 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
968
    }
969
970
    /**
971
     * Decodes a MIME header field
972
     *
973
     * @param string $str
974
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
975
     *
976
     * @return false|string
977
     *                      A decoded MIME field on success,
978
     *                      or false if an error occurs during the decoding
979
     */
980
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
981
    {
982
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
983
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
984
        }
985
986
        if (self::$SUPPORT['iconv'] === true) {
987
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
988
        }
989
990
        if ($encoding !== 'UTF-8') {
991
            $str = self::encode($encoding, $str);
992
        }
993
994
        return \mb_decode_mimeheader($str);
995
    }
996
997
    /**
998
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
999
     *
1000
     * @param string $str                        <p>The input string.</p>
1001
     * @param bool   $useReversibleStringMapping [optional] <p>
1002
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1003
     *                                           between "emoji_encode" and "emoji_decode".</p>
1004
     *
1005
     * @return string
1006
     */
1007 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1008
    {
1009 9
        self::initEmojiData();
1010
1011 9
        if ($useReversibleStringMapping === true) {
1012 9
            return (string) \str_replace(
1013 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1014 9
                (array) self::$EMOJI_VALUES_CACHE,
1015 9
                $str
1016
            );
1017
        }
1018
1019 1
        return (string) \str_replace(
1020 1
            (array) self::$EMOJI_KEYS_CACHE,
1021 1
            (array) self::$EMOJI_VALUES_CACHE,
1022 1
            $str
1023
        );
1024
    }
1025
1026
    /**
1027
     * Encode a string with emoji chars into a non-emoji string.
1028
     *
1029
     * @param string $str                        <p>The input string</p>
1030
     * @param bool   $useReversibleStringMapping [optional] <p>
1031
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1032
     *                                           between "emoji_encode" and "emoji_decode"</p>
1033
     *
1034
     * @return string
1035
     */
1036 12
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1037
    {
1038 12
        self::initEmojiData();
1039
1040 12
        if ($useReversibleStringMapping === true) {
1041 9
            return (string) \str_replace(
1042 9
                (array) self::$EMOJI_VALUES_CACHE,
1043 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1044 9
                $str
1045
            );
1046
        }
1047
1048 4
        return (string) \str_replace(
1049 4
            (array) self::$EMOJI_VALUES_CACHE,
1050 4
            (array) self::$EMOJI_KEYS_CACHE,
1051 4
            $str
1052
        );
1053
    }
1054
1055
    /**
1056
     * Encode a string with a new charset-encoding.
1057
     *
1058
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1059
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1060
     *
1061
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1062
     * @param string $str                    <p>The input string</p>
1063
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1064
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1065
     *                                       string-encoding</p>
1066
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1067
     *                                       A empty string will trigger the autodetect anyway.</p>
1068
     *
1069
     * @return string
1070
     *
1071
     * @psalm-suppress InvalidReturnStatement
1072
     */
1073 28
    public static function encode(
1074
        string $toEncoding,
1075
        string $str,
1076
        bool $autodetectFromEncoding = true,
1077
        string $fromEncoding = ''
1078
    ): string {
1079 28
        if ($str === '' || $toEncoding === '') {
1080 13
            return $str;
1081
        }
1082
1083 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1084 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1085
        }
1086
1087 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1088 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1089
        }
1090
1091
        if (
1092 28
            $toEncoding
1093
            &&
1094 28
            $fromEncoding
1095
            &&
1096 28
            $fromEncoding === $toEncoding
1097
        ) {
1098
            return $str;
1099
        }
1100
1101 28
        if ($toEncoding === 'JSON') {
1102 1
            $return = self::json_encode($str);
1103 1
            if ($return === false) {
1104
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1105
            }
1106
1107 1
            return $return;
1108
        }
1109 28
        if ($fromEncoding === 'JSON') {
1110 1
            $str = self::json_decode($str);
1111 1
            $fromEncoding = '';
1112
        }
1113
1114 28
        if ($toEncoding === 'BASE64') {
1115 2
            return \base64_encode($str);
1116
        }
1117 28
        if ($fromEncoding === 'BASE64') {
1118 2
            $str = \base64_decode($str, true);
1119 2
            $fromEncoding = '';
1120
        }
1121
1122 28
        if ($toEncoding === 'HTML-ENTITIES') {
1123 2
            return self::html_encode($str, true, 'UTF-8');
1124
        }
1125 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1126 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1127 2
            $fromEncoding = '';
1128
        }
1129
1130 28
        $fromEncodingDetected = false;
1131
        if (
1132 28
            $autodetectFromEncoding === true
1133
            ||
1134 28
            !$fromEncoding
1135
        ) {
1136 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1137
        }
1138
1139
        // DEBUG
1140
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1141
1142 28
        if ($fromEncodingDetected !== false) {
1143 24
            $fromEncoding = $fromEncodingDetected;
1144 7
        } elseif ($autodetectFromEncoding === true) {
1145
            // fallback for the "autodetect"-mode
1146 7
            return self::to_utf8($str);
1147
        }
1148
1149
        if (
1150 24
            !$fromEncoding
1151
            ||
1152 24
            $fromEncoding === $toEncoding
1153
        ) {
1154 15
            return $str;
1155
        }
1156
1157
        if (
1158 19
            $toEncoding === 'UTF-8'
1159
            &&
1160
            (
1161 17
                $fromEncoding === 'WINDOWS-1252'
1162
                ||
1163 19
                $fromEncoding === 'ISO-8859-1'
1164
            )
1165
        ) {
1166 13
            return self::to_utf8($str);
1167
        }
1168
1169
        if (
1170 12
            $toEncoding === 'ISO-8859-1'
1171
            &&
1172
            (
1173 6
                $fromEncoding === 'WINDOWS-1252'
1174
                ||
1175 12
                $fromEncoding === 'UTF-8'
1176
            )
1177
        ) {
1178 6
            return self::to_iso8859($str);
1179
        }
1180
1181
        if (
1182 10
            $toEncoding !== 'UTF-8'
1183
            &&
1184 10
            $toEncoding !== 'ISO-8859-1'
1185
            &&
1186 10
            $toEncoding !== 'WINDOWS-1252'
1187
            &&
1188 10
            self::$SUPPORT['mbstring'] === false
1189
        ) {
1190
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1191
        }
1192
1193 10
        if (self::$SUPPORT['mbstring'] === true) {
1194
            // warning: do not use the symfony polyfill here
1195 10
            $strEncoded = \mb_convert_encoding(
1196 10
                $str,
1197 10
                $toEncoding,
1198 10
                $fromEncoding
1199
            );
1200
1201 10
            if ($strEncoded) {
1202 10
                return $strEncoded;
1203
            }
1204
        }
1205
1206
        $return = \iconv($fromEncoding, $toEncoding, $str);
1207
        if ($return !== false) {
1208
            return $return;
1209
        }
1210
1211
        return $str;
1212
    }
1213
1214
    /**
1215
     * @param string $str
1216
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1217
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1218
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1219
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1220
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1221
     *
1222
     * @return false|string
1223
     *                      An encoded MIME field on success,
1224
     *                      or false if an error occurs during the encoding
1225
     */
1226
    public static function encode_mimeheader(
1227
        $str,
1228
        $fromCharset = 'UTF-8',
1229
        $toCharset = 'UTF-8',
1230
        $transferEncoding = 'Q',
1231
        $linefeed = '\\r\\n',
1232
        $indent = 76
1233
    ) {
1234
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1235
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1236
        }
1237
1238
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1239
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1240
        }
1241
1242
        return \iconv_mime_encode(
1243
            '',
1244
            $str,
1245
            [
1246
                'scheme'           => $transferEncoding,
1247
                'line-length'      => $indent,
1248
                'input-charset'    => $fromCharset,
1249
                'output-charset'   => $toCharset,
1250
                'line-break-chars' => $linefeed,
1251
            ]
1252
        );
1253
    }
1254
1255
    /**
1256
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1257
     *
1258
     * @param string   $str                    <p>The input string.</p>
1259
     * @param string   $search                 <p>The searched string.</p>
1260
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1261
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1262
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1263
     *
1264
     * @return string
1265
     */
1266 1
    public static function extract_text(
1267
        string $str,
1268
        string $search = '',
1269
        int $length = null,
1270
        string $replacerForSkippedText = '…',
1271
        string $encoding = 'UTF-8'
1272
    ): string {
1273 1
        if ($str === '') {
1274 1
            return '';
1275
        }
1276
1277 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1278
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1279
        }
1280
1281 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1282
1283 1
        if ($length === null) {
1284 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1285
        }
1286
1287 1
        if ($search === '') {
1288 1
            if ($encoding === 'UTF-8') {
1289 1
                if ($length > 0) {
1290 1
                    $stringLength = (int) \mb_strlen($str);
1291 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1292
                } else {
1293 1
                    $end = 0;
1294
                }
1295
1296 1
                $pos = (int) \min(
1297 1
                    \mb_strpos($str, ' ', $end),
1298 1
                    \mb_strpos($str, '.', $end)
1299
                );
1300
            } else {
1301
                if ($length > 0) {
1302
                    $stringLength = (int) self::strlen($str, $encoding);
1303
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1304
                } else {
1305
                    $end = 0;
1306
                }
1307
1308
                $pos = (int) \min(
1309
                    self::strpos($str, ' ', $end, $encoding),
1310
                    self::strpos($str, '.', $end, $encoding)
1311
                );
1312
            }
1313
1314 1
            if ($pos) {
1315 1
                if ($encoding === 'UTF-8') {
1316 1
                    $strSub = \mb_substr($str, 0, $pos);
1317
                } else {
1318
                    $strSub = self::substr($str, 0, $pos, $encoding);
1319
                }
1320
1321 1
                if ($strSub === false) {
1322
                    return '';
1323
                }
1324
1325 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1326
            }
1327
1328
            return $str;
1329
        }
1330
1331 1
        if ($encoding === 'UTF-8') {
1332 1
            $wordPos = (int) \mb_stripos($str, $search);
1333 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1334
        } else {
1335
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1336
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1337
        }
1338
1339 1
        $pos_start = 0;
1340 1
        if ($halfSide > 0) {
1341 1
            if ($encoding === 'UTF-8') {
1342 1
                $halfText = \mb_substr($str, 0, $halfSide);
1343
            } else {
1344
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1345
            }
1346 1
            if ($halfText !== false) {
1347 1
                if ($encoding === 'UTF-8') {
1348 1
                    $pos_start = (int) \max(
1349 1
                        \mb_strrpos($halfText, ' '),
1350 1
                        \mb_strrpos($halfText, '.')
1351
                    );
1352
                } else {
1353
                    $pos_start = (int) \max(
1354
                        self::strrpos($halfText, ' ', 0, $encoding),
1355
                        self::strrpos($halfText, '.', 0, $encoding)
1356
                    );
1357
                }
1358
            }
1359
        }
1360
1361 1
        if ($wordPos && $halfSide > 0) {
1362 1
            $offset = $pos_start + $length - 1;
1363 1
            $realLength = (int) self::strlen($str, $encoding);
1364
1365 1
            if ($offset > $realLength) {
1366
                $offset = $realLength;
1367
            }
1368
1369 1
            if ($encoding === 'UTF-8') {
1370 1
                $pos_end = (int) \min(
1371 1
                    \mb_strpos($str, ' ', $offset),
1372 1
                    \mb_strpos($str, '.', $offset)
1373 1
                ) - $pos_start;
1374
            } else {
1375
                $pos_end = (int) \min(
1376
                    self::strpos($str, ' ', $offset, $encoding),
1377
                    self::strpos($str, '.', $offset, $encoding)
1378
                ) - $pos_start;
1379
            }
1380
1381 1
            if (!$pos_end || $pos_end <= 0) {
1382 1
                if ($encoding === 'UTF-8') {
1383 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1384
                } else {
1385
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1386
                }
1387 1
                if ($strSub !== false) {
1388 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1389
                } else {
1390 1
                    $extract = '';
1391
                }
1392
            } else {
1393 1
                if ($encoding === 'UTF-8') {
1394 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1395
                } else {
1396
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1397
                }
1398 1
                if ($strSub !== false) {
1399 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1400
                } else {
1401 1
                    $extract = '';
1402
                }
1403
            }
1404
        } else {
1405 1
            $offset = $length - 1;
1406 1
            $trueLength = (int) self::strlen($str, $encoding);
1407
1408 1
            if ($offset > $trueLength) {
1409
                $offset = $trueLength;
1410
            }
1411
1412 1
            if ($encoding === 'UTF-8') {
1413 1
                $pos_end = (int) \min(
1414 1
                    \mb_strpos($str, ' ', $offset),
1415 1
                    \mb_strpos($str, '.', $offset)
1416
                );
1417
            } else {
1418
                $pos_end = (int) \min(
1419
                    self::strpos($str, ' ', $offset, $encoding),
1420
                    self::strpos($str, '.', $offset, $encoding)
1421
                );
1422
            }
1423
1424 1
            if ($pos_end) {
1425 1
                if ($encoding === 'UTF-8') {
1426 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1427
                } else {
1428
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1429
                }
1430 1
                if ($strSub !== false) {
1431 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1432
                } else {
1433 1
                    $extract = '';
1434
                }
1435
            } else {
1436 1
                $extract = $str;
1437
            }
1438
        }
1439
1440 1
        return $extract;
1441
    }
1442
1443
    /**
1444
     * Reads entire file into a string.
1445
     *
1446
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1447
     *
1448
     * @see http://php.net/manual/en/function.file-get-contents.php
1449
     *
1450
     * @param string        $filename         <p>
1451
     *                                        Name of the file to read.
1452
     *                                        </p>
1453
     * @param bool          $use_include_path [optional] <p>
1454
     *                                        Prior to PHP 5, this parameter is called
1455
     *                                        use_include_path and is a bool.
1456
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1457
     *                                        to trigger include path
1458
     *                                        search.
1459
     *                                        </p>
1460
     * @param resource|null $context          [optional] <p>
1461
     *                                        A valid context resource created with
1462
     *                                        stream_context_create. If you don't need to use a
1463
     *                                        custom context, you can skip this parameter by &null;.
1464
     *                                        </p>
1465
     * @param int|null      $offset           [optional] <p>
1466
     *                                        The offset where the reading starts.
1467
     *                                        </p>
1468
     * @param int|null      $maxLength        [optional] <p>
1469
     *                                        Maximum length of data read. The default is to read until end
1470
     *                                        of file is reached.
1471
     *                                        </p>
1472
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1473
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1474
     *                                        some files, because they used non default utf-8 chars. Binary files
1475
     *                                        like images or pdf will not be converted.</p>
1476
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1477
     *                                        A empty string will trigger the autodetect anyway.</p>
1478
     *
1479
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1480
     */
1481 12
    public static function file_get_contents(
1482
        string $filename,
1483
        bool $use_include_path = false,
1484
        $context = null,
1485
        int $offset = null,
1486
        int $maxLength = null,
1487
        int $timeout = 10,
1488
        bool $convertToUtf8 = true,
1489
        string $fromEncoding = ''
1490
    ) {
1491
        // init
1492 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1493 12
        if ($filename === false) {
1494
            return false;
1495
        }
1496
1497 12
        if ($timeout && $context === null) {
1498 9
            $context = \stream_context_create(
1499
                [
1500
                    'http' => [
1501 9
                        'timeout' => $timeout,
1502
                    ],
1503
                ]
1504
            );
1505
        }
1506
1507 12
        if ($offset === null) {
1508 12
            $offset = 0;
1509
        }
1510
1511 12
        if (\is_int($maxLength) === true) {
1512 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1513
        } else {
1514 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1515
        }
1516
1517
        // return false on error
1518 12
        if ($data === false) {
1519
            return false;
1520
        }
1521
1522 12
        if ($convertToUtf8 === true) {
1523
            if (
1524 12
                self::is_binary($data, true) !== true
1525
                ||
1526 9
                self::is_utf16($data, false) !== false
1527
                ||
1528 12
                self::is_utf32($data, false) !== false
1529
            ) {
1530 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1531 9
                $data = self::cleanup($data);
1532
            }
1533
        }
1534
1535 12
        return $data;
1536
    }
1537
1538
    /**
1539
     * Checks if a file starts with BOM (Byte Order Mark) character.
1540
     *
1541
     * @param string $file_path <p>Path to a valid file.</p>
1542
     *
1543
     * @throws \RuntimeException if file_get_contents() returned false
1544
     *
1545
     * @return bool
1546
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1547
     */
1548 2
    public static function file_has_bom(string $file_path): bool
1549
    {
1550 2
        $file_content = \file_get_contents($file_path);
1551 2
        if ($file_content === false) {
1552
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1553
        }
1554
1555 2
        return self::string_has_bom($file_content);
1556
    }
1557
1558
    /**
1559
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1560
     *
1561
     * @param mixed  $var
1562
     * @param int    $normalization_form
1563
     * @param string $leading_combining
1564
     *
1565
     * @return mixed
1566
     */
1567 62
    public static function filter(
1568
        $var,
1569
        int $normalization_form = \Normalizer::NFC,
1570
        string $leading_combining = '◌'
1571
    ) {
1572 62
        switch (\gettype($var)) {
1573 62
            case 'array':
1574
                /** @noinspection ForeachSourceInspection */
1575 6
                foreach ($var as $k => &$v) {
1576 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1577
                }
1578 6
                unset($v);
1579
1580 6
                break;
1581 62
            case 'object':
1582
                /** @noinspection ForeachSourceInspection */
1583 4
                foreach ($var as $k => &$v) {
1584 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1585
                }
1586 4
                unset($v);
1587
1588 4
                break;
1589 62
            case 'string':
1590
1591 62
                if (\strpos($var, "\r") !== false) {
1592
                    // Workaround https://bugs.php.net/65732
1593 3
                    $var = self::normalize_line_ending($var);
1594
                }
1595
1596 62
                if (ASCII::is_ascii($var) === false) {
1597 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1598 27
                        $n = '-';
1599
                    } else {
1600 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1601
1602 12
                        if (isset($n[0])) {
1603 7
                            $var = $n;
1604
                        } else {
1605 8
                            $var = self::encode('UTF-8', $var, true);
1606
                        }
1607
                    }
1608
1609
                    if (
1610 32
                        $var[0] >= "\x80"
1611
                        &&
1612 32
                        isset($n[0], $leading_combining[0])
1613
                        &&
1614 32
                        \preg_match('/^\\p{Mn}/u', $var)
1615
                    ) {
1616
                        // Prevent leading combining chars
1617
                        // for NFC-safe concatenations.
1618 3
                        $var = $leading_combining . $var;
1619
                    }
1620
                }
1621
1622 62
                break;
1623
        }
1624
1625 62
        return $var;
1626
    }
1627
1628
    /**
1629
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1630
     *
1631
     * Gets a specific external variable by name and optionally filters it
1632
     *
1633
     * @see http://php.net/manual/en/function.filter-input.php
1634
     *
1635
     * @param int    $type          <p>
1636
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1637
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1638
     *                              <b>INPUT_ENV</b>.
1639
     *                              </p>
1640
     * @param string $variable_name <p>
1641
     *                              Name of a variable to get.
1642
     *                              </p>
1643
     * @param int    $filter        [optional] <p>
1644
     *                              The ID of the filter to apply. The
1645
     *                              manual page lists the available filters.
1646
     *                              </p>
1647
     * @param mixed  $options       [optional] <p>
1648
     *                              Associative array of options or bitwise disjunction of flags. If filter
1649
     *                              accepts options, flags can be provided in "flags" field of array.
1650
     *                              </p>
1651
     *
1652
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1653
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1654
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1655
     */
1656
    public static function filter_input(
1657
        int $type,
1658
        string $variable_name,
1659
        int $filter = \FILTER_DEFAULT,
1660
        $options = null
1661
    ) {
1662
        if (\func_num_args() < 4) {
1663
            $var = \filter_input($type, $variable_name, $filter);
1664
        } else {
1665
            $var = \filter_input($type, $variable_name, $filter, $options);
1666
        }
1667
1668
        return self::filter($var);
1669
    }
1670
1671
    /**
1672
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1673
     *
1674
     * Gets external variables and optionally filters them
1675
     *
1676
     * @see http://php.net/manual/en/function.filter-input-array.php
1677
     *
1678
     * @param int   $type       <p>
1679
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1680
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1681
     *                          <b>INPUT_ENV</b>.
1682
     *                          </p>
1683
     * @param mixed $definition [optional] <p>
1684
     *                          An array defining the arguments. A valid key is a string
1685
     *                          containing a variable name and a valid value is either a filter type, or an array
1686
     *                          optionally specifying the filter, flags and options. If the value is an
1687
     *                          array, valid keys are filter which specifies the
1688
     *                          filter type,
1689
     *                          flags which specifies any flags that apply to the
1690
     *                          filter, and options which specifies any options that
1691
     *                          apply to the filter. See the example below for a better understanding.
1692
     *                          </p>
1693
     *                          <p>
1694
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1695
     *                          input array are filtered by this filter.
1696
     *                          </p>
1697
     * @param bool  $add_empty  [optional] <p>
1698
     *                          Add missing keys as <b>NULL</b> to the return value.
1699
     *                          </p>
1700
     *
1701
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1702
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1703
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1704
     *               is not set and <b>NULL</b> if the filter fails.
1705
     */
1706
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1707
    {
1708
        if (\func_num_args() < 2) {
1709
            $a = \filter_input_array($type);
1710
        } else {
1711
            $a = \filter_input_array($type, $definition, $add_empty);
1712
        }
1713
1714
        return self::filter($a);
1715
    }
1716
1717
    /**
1718
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1719
     *
1720
     * Filters a variable with a specified filter
1721
     *
1722
     * @see http://php.net/manual/en/function.filter-var.php
1723
     *
1724
     * @param mixed $variable <p>
1725
     *                        Value to filter.
1726
     *                        </p>
1727
     * @param int   $filter   [optional] <p>
1728
     *                        The ID of the filter to apply. The
1729
     *                        manual page lists the available filters.
1730
     *                        </p>
1731
     * @param mixed $options  [optional] <p>
1732
     *                        Associative array of options or bitwise disjunction of flags. If filter
1733
     *                        accepts options, flags can be provided in "flags" field of array. For
1734
     *                        the "callback" filter, callable type should be passed. The
1735
     *                        callback must accept one argument, the value to be filtered, and return
1736
     *                        the value after filtering/sanitizing it.
1737
     *                        </p>
1738
     *                        <p>
1739
     *                        <code>
1740
     *                        // for filters that accept options, use this format
1741
     *                        $options = array(
1742
     *                        'options' => array(
1743
     *                        'default' => 3, // value to return if the filter fails
1744
     *                        // other options here
1745
     *                        'min_range' => 0
1746
     *                        ),
1747
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1748
     *                        );
1749
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1750
     *                        // for filter that only accept flags, you can pass them directly
1751
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1752
     *                        // for filter that only accept flags, you can also pass as an array
1753
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1754
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1755
     *                        // callback validate filter
1756
     *                        function foo($value)
1757
     *                        {
1758
     *                        // Expected format: Surname, GivenNames
1759
     *                        if (strpos($value, ", ") === false) return false;
1760
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1761
     *                        $empty = (empty($surname) || empty($givennames));
1762
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1763
     *                        if ($empty || $notstrings) {
1764
     *                        return false;
1765
     *                        } else {
1766
     *                        return $value;
1767
     *                        }
1768
     *                        }
1769
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1770
     *                        </code>
1771
     *                        </p>
1772
     *
1773
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1774
     */
1775 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1776
    {
1777 2
        if (\func_num_args() < 3) {
1778 2
            $variable = \filter_var($variable, $filter);
1779
        } else {
1780 2
            $variable = \filter_var($variable, $filter, $options);
1781
        }
1782
1783 2
        return self::filter($variable);
1784
    }
1785
1786
    /**
1787
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1788
     *
1789
     * Gets multiple variables and optionally filters them
1790
     *
1791
     * @see http://php.net/manual/en/function.filter-var-array.php
1792
     *
1793
     * @param array $data       <p>
1794
     *                          An array with string keys containing the data to filter.
1795
     *                          </p>
1796
     * @param mixed $definition [optional] <p>
1797
     *                          An array defining the arguments. A valid key is a string
1798
     *                          containing a variable name and a valid value is either a
1799
     *                          filter type, or an
1800
     *                          array optionally specifying the filter, flags and options.
1801
     *                          If the value is an array, valid keys are filter
1802
     *                          which specifies the filter type,
1803
     *                          flags which specifies any flags that apply to the
1804
     *                          filter, and options which specifies any options that
1805
     *                          apply to the filter. See the example below for a better understanding.
1806
     *                          </p>
1807
     *                          <p>
1808
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1809
     *                          input array are filtered by this filter.
1810
     *                          </p>
1811
     * @param bool  $add_empty  [optional] <p>
1812
     *                          Add missing keys as <b>NULL</b> to the return value.
1813
     *                          </p>
1814
     *
1815
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1816
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1817
     *               set
1818
     */
1819 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1820
    {
1821 2
        if (\func_num_args() < 2) {
1822 2
            $a = \filter_var_array($data);
1823
        } else {
1824 2
            $a = \filter_var_array($data, $definition, $add_empty);
1825
        }
1826
1827 2
        return self::filter($a);
1828
    }
1829
1830
    /**
1831
     * Checks whether finfo is available on the server.
1832
     *
1833
     * @return bool
1834
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1835
     */
1836
    public static function finfo_loaded(): bool
1837
    {
1838
        return \class_exists('finfo');
1839
    }
1840
1841
    /**
1842
     * Returns the first $n characters of the string.
1843
     *
1844
     * @param string $str      <p>The input string.</p>
1845
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1846
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1847
     *
1848
     * @return string
1849
     */
1850 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1851
    {
1852 13
        if ($str === '' || $n <= 0) {
1853 5
            return '';
1854
        }
1855
1856 8
        if ($encoding === 'UTF-8') {
1857 4
            return (string) \mb_substr($str, 0, $n);
1858
        }
1859
1860 4
        return (string) self::substr($str, 0, $n, $encoding);
1861
    }
1862
1863
    /**
1864
     * Check if the number of unicode characters are not more than the specified integer.
1865
     *
1866
     * @param string $str      the original string to be checked
1867
     * @param int    $box_size the size in number of chars to be checked against string
1868
     *
1869
     * @return bool true if string is less than or equal to $box_size, false otherwise
1870
     */
1871 2
    public static function fits_inside(string $str, int $box_size): bool
1872
    {
1873 2
        return self::strlen($str) <= $box_size;
1874
    }
1875
1876
    /**
1877
     * Try to fix simple broken UTF-8 strings.
1878
     *
1879
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1880
     *
1881
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1882
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1883
     * See: http://en.wikipedia.org/wiki/Windows-1252
1884
     *
1885
     * @param string $str <p>The input string</p>
1886
     *
1887
     * @return string
1888
     */
1889 46
    public static function fix_simple_utf8(string $str): string
1890
    {
1891 46
        if ($str === '') {
1892 4
            return '';
1893
        }
1894
1895 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1896 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1897
1898 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1899 1
            if (self::$BROKEN_UTF8_FIX === null) {
1900 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1901
            }
1902
1903 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1904 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1905
        }
1906
1907 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1908
    }
1909
1910
    /**
1911
     * Fix a double (or multiple) encoded UTF8 string.
1912
     *
1913
     * @param string|string[] $str you can use a string or an array of strings
1914
     *
1915
     * @return string|string[]
1916
     *                         Will return the fixed input-"array" or
1917
     *                         the fixed input-"string"
1918
     *
1919
     * @psalm-suppress InvalidReturnType
1920
     */
1921 2
    public static function fix_utf8($str)
1922
    {
1923 2
        if (\is_array($str) === true) {
1924 2
            foreach ($str as $k => &$v) {
1925 2
                $v = self::fix_utf8($v);
1926
            }
1927 2
            unset($v);
1928
1929
            /**
1930
             * @psalm-suppress InvalidReturnStatement
1931
             */
1932 2
            return $str;
1933
        }
1934
1935 2
        $str = (string) $str;
1936 2
        $last = '';
1937 2
        while ($last !== $str) {
1938 2
            $last = $str;
1939
            /**
1940
             * @psalm-suppress PossiblyInvalidArgument
1941
             */
1942 2
            $str = self::to_utf8(
1943 2
                self::utf8_decode($str, true)
1944
            );
1945
        }
1946
1947
        /**
1948
         * @psalm-suppress InvalidReturnStatement
1949
         */
1950 2
        return $str;
1951
    }
1952
1953
    /**
1954
     * Get character of a specific character.
1955
     *
1956
     * @param string $char
1957
     *
1958
     * @return string 'RTL' or 'LTR'
1959
     */
1960 2
    public static function getCharDirection(string $char): string
1961
    {
1962 2
        if (self::$SUPPORT['intlChar'] === true) {
1963
            /** @noinspection PhpComposerExtensionStubsInspection */
1964 2
            $tmpReturn = \IntlChar::charDirection($char);
1965
1966
            // from "IntlChar"-Class
1967
            $charDirection = [
1968 2
                'RTL' => [1, 13, 14, 15, 21],
1969
                'LTR' => [0, 11, 12, 20],
1970
            ];
1971
1972 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1973
                return 'LTR';
1974
            }
1975
1976 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1977 2
                return 'RTL';
1978
            }
1979
        }
1980
1981 2
        $c = static::chr_to_decimal($char);
1982
1983 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1984 2
            return 'LTR';
1985
        }
1986
1987 2
        if ($c <= 0x85e) {
1988 2
            if ($c === 0x5be ||
1989 2
                $c === 0x5c0 ||
1990 2
                $c === 0x5c3 ||
1991 2
                $c === 0x5c6 ||
1992 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1993 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1994 2
                $c === 0x608 ||
1995 2
                $c === 0x60b ||
1996 2
                $c === 0x60d ||
1997 2
                $c === 0x61b ||
1998 2
                ($c >= 0x61e && $c <= 0x64a) ||
1999
                ($c >= 0x66d && $c <= 0x66f) ||
2000
                ($c >= 0x671 && $c <= 0x6d5) ||
2001
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2002
                ($c >= 0x6ee && $c <= 0x6ef) ||
2003
                ($c >= 0x6fa && $c <= 0x70d) ||
2004
                $c === 0x710 ||
2005
                ($c >= 0x712 && $c <= 0x72f) ||
2006
                ($c >= 0x74d && $c <= 0x7a5) ||
2007
                $c === 0x7b1 ||
2008
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2009
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2010
                $c === 0x7fa ||
2011
                ($c >= 0x800 && $c <= 0x815) ||
2012
                $c === 0x81a ||
2013
                $c === 0x824 ||
2014
                $c === 0x828 ||
2015
                ($c >= 0x830 && $c <= 0x83e) ||
2016
                ($c >= 0x840 && $c <= 0x858) ||
2017 2
                $c === 0x85e
2018
            ) {
2019 2
                return 'RTL';
2020
            }
2021 2
        } elseif ($c === 0x200f) {
2022
            return 'RTL';
2023 2
        } elseif ($c >= 0xfb1d) {
2024 2
            if ($c === 0xfb1d ||
2025 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2026 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2027 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2028 2
                $c === 0xfb3e ||
2029 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2030 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2031 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2032 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2033 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2034 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2035 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2036 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2037 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2038 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2039 2
                $c === 0x10808 ||
2040 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2041 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2042 2
                $c === 0x1083c ||
2043 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2044 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2045 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2046 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2047 2
                $c === 0x1093f ||
2048 2
                $c === 0x10a00 ||
2049 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2050 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2051 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2052 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2053 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2054 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2055 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2056 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2057 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2058 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2059
            ) {
2060 2
                return 'RTL';
2061
            }
2062
        }
2063
2064 2
        return 'LTR';
2065
    }
2066
2067
    /**
2068
     * Check for php-support.
2069
     *
2070
     * @param string|null $key
2071
     *
2072
     * @return mixed
2073
     *               Return the full support-"array", if $key === null<br>
2074
     *               return bool-value, if $key is used and available<br>
2075
     *               otherwise return <strong>null</strong>
2076
     */
2077 27
    public static function getSupportInfo(string $key = null)
2078
    {
2079 27
        if ($key === null) {
2080 4
            return self::$SUPPORT;
2081
        }
2082
2083 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2084 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2085
        }
2086
        // compatibility fix for old versions
2087 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2088
2089 25
        return self::$SUPPORT[$key] ?? null;
2090
    }
2091
2092
    /**
2093
     * Warning: this method only works for some file-types (png, jpg)
2094
     *          if you need more supported types, please use e.g. "finfo"
2095
     *
2096
     * @param string $str
2097
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2098
     *
2099
     * @return array
2100
     *               with this keys: 'ext', 'mime', 'type'
2101
     */
2102 39
    public static function get_file_type(
2103
        string $str,
2104
        array $fallback = [
2105
            'ext'  => null,
2106
            'mime' => 'application/octet-stream',
2107
            'type' => null,
2108
        ]
2109
    ): array {
2110 39
        if ($str === '') {
2111
            return $fallback;
2112
        }
2113
2114 39
        $str_info = \substr($str, 0, 2);
2115 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2116 11
            return $fallback;
2117
        }
2118
2119
        // DEBUG
2120
        //var_dump($str_info);
2121
2122 35
        $str_info = \unpack('C2chars', $str_info);
2123 35
        if ($str_info === false) {
2124
            return $fallback;
2125
        }
2126
        /** @noinspection OffsetOperationsInspection */
2127 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2128
2129
        // DEBUG
2130
        //var_dump($type_code);
2131
2132
        //
2133
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2134
        //
2135
        switch ($type_code) {
2136
            // WARNING: do not add too simple comparisons, because of false-positive results:
2137
            //
2138
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2139
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2140
            //
2141 35
            case 255216:
2142
                $ext = 'jpg';
2143
                $mime = 'image/jpeg';
2144
                $type = 'binary';
2145
2146
                break;
2147 35
            case 13780:
2148 7
                $ext = 'png';
2149 7
                $mime = 'image/png';
2150 7
                $type = 'binary';
2151
2152 7
                break;
2153
            default:
2154 34
                return $fallback;
2155
        }
2156
2157
        return [
2158 7
            'ext'  => $ext,
2159 7
            'mime' => $mime,
2160 7
            'type' => $type,
2161
        ];
2162
    }
2163
2164
    /**
2165
     * @param int    $length        <p>Length of the random string.</p>
2166
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2167
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2168
     *
2169
     * @return string
2170
     */
2171 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2172
    {
2173
        // init
2174 1
        $i = 0;
2175 1
        $str = '';
2176
2177
        //
2178
        // add random chars
2179
        //
2180
2181 1
        if ($encoding === 'UTF-8') {
2182 1
            $maxlength = (int) \mb_strlen($possibleChars);
2183 1
            if ($maxlength === 0) {
2184 1
                return '';
2185
            }
2186
2187 1
            while ($i < $length) {
2188
                try {
2189 1
                    $randInt = \random_int(0, $maxlength - 1);
2190
                } catch (\Exception $e) {
2191
                    /** @noinspection RandomApiMigrationInspection */
2192
                    $randInt = \mt_rand(0, $maxlength - 1);
2193
                }
2194 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2195 1
                if ($char !== false) {
2196 1
                    $str .= $char;
2197 1
                    ++$i;
2198
                }
2199
            }
2200
        } else {
2201
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2202
2203
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2204
            if ($maxlength === 0) {
2205
                return '';
2206
            }
2207
2208
            while ($i < $length) {
2209
                try {
2210
                    $randInt = \random_int(0, $maxlength - 1);
2211
                } catch (\Exception $e) {
2212
                    /** @noinspection RandomApiMigrationInspection */
2213
                    $randInt = \mt_rand(0, $maxlength - 1);
2214
                }
2215
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2216
                if ($char !== false) {
2217
                    $str .= $char;
2218
                    ++$i;
2219
                }
2220
            }
2221
        }
2222
2223 1
        return $str;
2224
    }
2225
2226
    /**
2227
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2228
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2229
     *
2230
     * @return string
2231
     */
2232 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2233
    {
2234 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2235 1
                        \session_id() .
2236 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2237 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2238 1
                        $entropyExtra;
2239
2240 1
        $uniqueString = \uniqid($uniqueHelper, true);
2241
2242 1
        if ($md5) {
2243 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2244
        }
2245
2246 1
        return $uniqueString;
2247
    }
2248
2249
    /**
2250
     * alias for "UTF8::string_has_bom()"
2251
     *
2252
     * @param string $str
2253
     *
2254
     * @return bool
2255
     *
2256
     * @see UTF8::string_has_bom()
2257
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2258
     */
2259 2
    public static function hasBom(string $str): bool
2260
    {
2261 2
        return self::string_has_bom($str);
2262
    }
2263
2264
    /**
2265
     * Returns true if the string contains a lower case char, false otherwise.
2266
     *
2267
     * @param string $str <p>The input string.</p>
2268
     *
2269
     * @return bool whether or not the string contains a lower case character
2270
     */
2271 47
    public static function has_lowercase(string $str): bool
2272
    {
2273 47
        if (self::$SUPPORT['mbstring'] === true) {
2274
            /** @noinspection PhpComposerExtensionStubsInspection */
2275 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2276
        }
2277
2278
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2279
    }
2280
2281
    /**
2282
     * Returns true if the string contains an upper case char, false otherwise.
2283
     *
2284
     * @param string $str <p>The input string.</p>
2285
     *
2286
     * @return bool whether or not the string contains an upper case character
2287
     */
2288 12
    public static function has_uppercase(string $str): bool
2289
    {
2290 12
        if (self::$SUPPORT['mbstring'] === true) {
2291
            /** @noinspection PhpComposerExtensionStubsInspection */
2292 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2293
        }
2294
2295
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2296
    }
2297
2298
    /**
2299
     * Converts a hexadecimal-value into an UTF-8 character.
2300
     *
2301
     * @param string $hexdec <p>The hexadecimal value.</p>
2302
     *
2303
     * @return false|string one single UTF-8 character
2304
     */
2305 4
    public static function hex_to_chr(string $hexdec)
2306
    {
2307 4
        return self::decimal_to_chr(\hexdec($hexdec));
2308
    }
2309
2310
    /**
2311
     * Converts hexadecimal U+xxxx code point representation to integer.
2312
     *
2313
     * INFO: opposite to UTF8::int_to_hex()
2314
     *
2315
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2316
     *
2317
     * @return false|int the code point, or false on failure
2318
     */
2319 2
    public static function hex_to_int($hexDec)
2320
    {
2321
        // init
2322 2
        $hexDec = (string) $hexDec;
2323
2324 2
        if ($hexDec === '') {
2325 2
            return false;
2326
        }
2327
2328 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2329 2
            return \intval($match[1], 16);
2330
        }
2331
2332 2
        return false;
2333
    }
2334
2335
    /**
2336
     * alias for "UTF8::html_entity_decode()"
2337
     *
2338
     * @param string $str
2339
     * @param int    $flags
2340
     * @param string $encoding
2341
     *
2342
     * @return string
2343
     *
2344
     * @see UTF8::html_entity_decode()
2345
     */
2346 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2347
    {
2348 4
        return self::html_entity_decode($str, $flags, $encoding);
2349
    }
2350
2351
    /**
2352
     * Converts a UTF-8 string to a series of HTML numbered entities.
2353
     *
2354
     * INFO: opposite to UTF8::html_decode()
2355
     *
2356
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2357
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2358
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2359
     *
2360
     * @return string HTML numbered entities
2361
     */
2362 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2363
    {
2364 14
        if ($str === '') {
2365 4
            return '';
2366
        }
2367
2368 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2369 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2370
        }
2371
2372
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2373 14
        if (self::$SUPPORT['mbstring'] === true) {
2374 14
            $startCode = 0x00;
2375 14
            if ($keepAsciiChars === true) {
2376 13
                $startCode = 0x80;
2377
            }
2378
2379 14
            if ($encoding === 'UTF-8') {
2380 14
                return \mb_encode_numericentity(
2381 14
                    $str,
2382 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2383
                );
2384
            }
2385
2386 4
            return \mb_encode_numericentity(
2387 4
                $str,
2388 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2389 4
                $encoding
2390
            );
2391
        }
2392
2393
        //
2394
        // fallback via vanilla php
2395
        //
2396
2397
        return \implode(
2398
            '',
2399
            \array_map(
2400
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2401
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2402
                },
2403
                self::str_split($str)
2404
            )
2405
        );
2406
    }
2407
2408
    /**
2409
     * UTF-8 version of html_entity_decode()
2410
     *
2411
     * The reason we are not using html_entity_decode() by itself is because
2412
     * while it is not technically correct to leave out the semicolon
2413
     * at the end of an entity most browsers will still interpret the entity
2414
     * correctly. html_entity_decode() does not convert entities without
2415
     * semicolons, so we are left with our own little solution here. Bummer.
2416
     *
2417
     * Convert all HTML entities to their applicable characters
2418
     *
2419
     * INFO: opposite to UTF8::html_encode()
2420
     *
2421
     * @see http://php.net/manual/en/function.html-entity-decode.php
2422
     *
2423
     * @param string $str      <p>
2424
     *                         The input string.
2425
     *                         </p>
2426
     * @param int    $flags    [optional] <p>
2427
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2428
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2429
     *                         <table>
2430
     *                         Available <i>flags</i> constants
2431
     *                         <tr valign="top">
2432
     *                         <td>Constant Name</td>
2433
     *                         <td>Description</td>
2434
     *                         </tr>
2435
     *                         <tr valign="top">
2436
     *                         <td><b>ENT_COMPAT</b></td>
2437
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2438
     *                         </tr>
2439
     *                         <tr valign="top">
2440
     *                         <td><b>ENT_QUOTES</b></td>
2441
     *                         <td>Will convert both double and single quotes.</td>
2442
     *                         </tr>
2443
     *                         <tr valign="top">
2444
     *                         <td><b>ENT_NOQUOTES</b></td>
2445
     *                         <td>Will leave both double and single quotes unconverted.</td>
2446
     *                         </tr>
2447
     *                         <tr valign="top">
2448
     *                         <td><b>ENT_HTML401</b></td>
2449
     *                         <td>
2450
     *                         Handle code as HTML 4.01.
2451
     *                         </td>
2452
     *                         </tr>
2453
     *                         <tr valign="top">
2454
     *                         <td><b>ENT_XML1</b></td>
2455
     *                         <td>
2456
     *                         Handle code as XML 1.
2457
     *                         </td>
2458
     *                         </tr>
2459
     *                         <tr valign="top">
2460
     *                         <td><b>ENT_XHTML</b></td>
2461
     *                         <td>
2462
     *                         Handle code as XHTML.
2463
     *                         </td>
2464
     *                         </tr>
2465
     *                         <tr valign="top">
2466
     *                         <td><b>ENT_HTML5</b></td>
2467
     *                         <td>
2468
     *                         Handle code as HTML 5.
2469
     *                         </td>
2470
     *                         </tr>
2471
     *                         </table>
2472
     *                         </p>
2473
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2474
     *
2475
     * @return string the decoded string
2476
     */
2477 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2478
    {
2479
        if (
2480 46
            !isset($str[3]) // examples: &; || &x;
2481
            ||
2482 46
            \strpos($str, '&') === false // no "&"
2483
        ) {
2484 23
            return $str;
2485
        }
2486
2487 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2488 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2489
        }
2490
2491 44
        if ($flags === null) {
2492 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2493
        }
2494
2495
        if (
2496 44
            $encoding !== 'UTF-8'
2497
            &&
2498 44
            $encoding !== 'ISO-8859-1'
2499
            &&
2500 44
            $encoding !== 'WINDOWS-1252'
2501
            &&
2502 44
            self::$SUPPORT['mbstring'] === false
2503
        ) {
2504
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2505
        }
2506
2507
        do {
2508 44
            $str_compare = $str;
2509
2510
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2511 44
            if (self::$SUPPORT['mbstring'] === true) {
2512 44
                if ($encoding === 'UTF-8') {
2513 44
                    $str = \mb_decode_numericentity(
2514 44
                        $str,
2515 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2516
                    );
2517
                } else {
2518 4
                    $str = \mb_decode_numericentity(
2519 4
                        $str,
2520 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2521 44
                        $encoding
2522
                    );
2523
                }
2524
            } else {
2525
                $str = (string) \preg_replace_callback(
2526
                    "/&#\d{2,6};/",
2527
                    /**
2528
                     * @param string[] $matches
2529
                     *
2530
                     * @return string
2531
                     */
2532
                    static function (array $matches) use ($encoding): string {
2533
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2534
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2535
                            return $returnTmp;
2536
                        }
2537
2538
                        return $matches[0];
2539
                    },
2540
                    $str
2541
                );
2542
            }
2543
2544 44
            if (\strpos($str, '&') !== false) {
2545 40
                if (\strpos($str, '&#') !== false) {
2546
                    // decode also numeric & UTF16 two byte entities
2547 32
                    $str = (string) \preg_replace(
2548 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2549 32
                        '$1;',
2550 32
                        $str
2551
                    );
2552
                }
2553
2554 40
                $str = \html_entity_decode(
2555 40
                    $str,
2556 40
                    $flags,
2557 40
                    $encoding
2558
                );
2559
            }
2560 44
        } while ($str_compare !== $str);
2561
2562 44
        return $str;
2563
    }
2564
2565
    /**
2566
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2567
     *
2568
     * @param string $str
2569
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2570
     *
2571
     * @return string
2572
     */
2573 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2574
    {
2575 6
        return self::htmlspecialchars(
2576 6
            $str,
2577 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2578 6
            $encoding
2579
        );
2580
    }
2581
2582
    /**
2583
     * Remove empty html-tag.
2584
     *
2585
     * e.g.: <tag></tag>
2586
     *
2587
     * @param string $str
2588
     *
2589
     * @return string
2590
     */
2591 1
    public static function html_stripe_empty_tags(string $str): string
2592
    {
2593 1
        return (string) \preg_replace(
2594 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2595 1
            '',
2596 1
            $str
2597
        );
2598
    }
2599
2600
    /**
2601
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2602
     *
2603
     * @see http://php.net/manual/en/function.htmlentities.php
2604
     *
2605
     * @param string $str           <p>
2606
     *                              The input string.
2607
     *                              </p>
2608
     * @param int    $flags         [optional] <p>
2609
     *                              A bitmask of one or more of the following flags, which specify how to handle
2610
     *                              quotes, invalid code unit sequences and the used document type. The default is
2611
     *                              ENT_COMPAT | ENT_HTML401.
2612
     *                              <table>
2613
     *                              Available <i>flags</i> constants
2614
     *                              <tr valign="top">
2615
     *                              <td>Constant Name</td>
2616
     *                              <td>Description</td>
2617
     *                              </tr>
2618
     *                              <tr valign="top">
2619
     *                              <td><b>ENT_COMPAT</b></td>
2620
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2621
     *                              </tr>
2622
     *                              <tr valign="top">
2623
     *                              <td><b>ENT_QUOTES</b></td>
2624
     *                              <td>Will convert both double and single quotes.</td>
2625
     *                              </tr>
2626
     *                              <tr valign="top">
2627
     *                              <td><b>ENT_NOQUOTES</b></td>
2628
     *                              <td>Will leave both double and single quotes unconverted.</td>
2629
     *                              </tr>
2630
     *                              <tr valign="top">
2631
     *                              <td><b>ENT_IGNORE</b></td>
2632
     *                              <td>
2633
     *                              Silently discard invalid code unit sequences instead of returning
2634
     *                              an empty string. Using this flag is discouraged as it
2635
     *                              may have security implications.
2636
     *                              </td>
2637
     *                              </tr>
2638
     *                              <tr valign="top">
2639
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2640
     *                              <td>
2641
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2642
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2643
     *                              string.
2644
     *                              </td>
2645
     *                              </tr>
2646
     *                              <tr valign="top">
2647
     *                              <td><b>ENT_DISALLOWED</b></td>
2648
     *                              <td>
2649
     *                              Replace invalid code points for the given document type with a
2650
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2651
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2652
     *                              instance, to ensure the well-formedness of XML documents with
2653
     *                              embedded external content.
2654
     *                              </td>
2655
     *                              </tr>
2656
     *                              <tr valign="top">
2657
     *                              <td><b>ENT_HTML401</b></td>
2658
     *                              <td>
2659
     *                              Handle code as HTML 4.01.
2660
     *                              </td>
2661
     *                              </tr>
2662
     *                              <tr valign="top">
2663
     *                              <td><b>ENT_XML1</b></td>
2664
     *                              <td>
2665
     *                              Handle code as XML 1.
2666
     *                              </td>
2667
     *                              </tr>
2668
     *                              <tr valign="top">
2669
     *                              <td><b>ENT_XHTML</b></td>
2670
     *                              <td>
2671
     *                              Handle code as XHTML.
2672
     *                              </td>
2673
     *                              </tr>
2674
     *                              <tr valign="top">
2675
     *                              <td><b>ENT_HTML5</b></td>
2676
     *                              <td>
2677
     *                              Handle code as HTML 5.
2678
     *                              </td>
2679
     *                              </tr>
2680
     *                              </table>
2681
     *                              </p>
2682
     * @param string $encoding      [optional] <p>
2683
     *                              Like <b>htmlspecialchars</b>,
2684
     *                              <b>htmlentities</b> takes an optional third argument
2685
     *                              <i>encoding</i> which defines encoding used in
2686
     *                              conversion.
2687
     *                              Although this argument is technically optional, you are highly
2688
     *                              encouraged to specify the correct value for your code.
2689
     *                              </p>
2690
     * @param bool   $double_encode [optional] <p>
2691
     *                              When <i>double_encode</i> is turned off PHP will not
2692
     *                              encode existing html entities. The default is to convert everything.
2693
     *                              </p>
2694
     *
2695
     * @return string
2696
     *                <p>
2697
     *                The encoded string.
2698
     *                <br><br>
2699
     *                If the input <i>string</i> contains an invalid code unit
2700
     *                sequence within the given <i>encoding</i> an empty string
2701
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2702
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2703
     *                </p>
2704
     */
2705 9
    public static function htmlentities(
2706
        string $str,
2707
        int $flags = \ENT_COMPAT,
2708
        string $encoding = 'UTF-8',
2709
        bool $double_encode = true
2710
    ): string {
2711 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2712 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2713
        }
2714
2715 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2716
2717
        /**
2718
         * PHP doesn't replace a backslash to its html entity since this is something
2719
         * that's mostly used to escape characters when inserting in a database. Since
2720
         * we're using a decent database layer, we don't need this shit and we're replacing
2721
         * the double backslashes by its' html entity equivalent.
2722
         *
2723
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2724
         */
2725 9
        $str = \str_replace('\\', '&#92;', $str);
2726
2727 9
        return self::html_encode($str, true, $encoding);
2728
    }
2729
2730
    /**
2731
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2732
     *
2733
     * INFO: Take a look at "UTF8::htmlentities()"
2734
     *
2735
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2736
     *
2737
     * @param string $str           <p>
2738
     *                              The string being converted.
2739
     *                              </p>
2740
     * @param int    $flags         [optional] <p>
2741
     *                              A bitmask of one or more of the following flags, which specify how to handle
2742
     *                              quotes, invalid code unit sequences and the used document type. The default is
2743
     *                              ENT_COMPAT | ENT_HTML401.
2744
     *                              <table>
2745
     *                              Available <i>flags</i> constants
2746
     *                              <tr valign="top">
2747
     *                              <td>Constant Name</td>
2748
     *                              <td>Description</td>
2749
     *                              </tr>
2750
     *                              <tr valign="top">
2751
     *                              <td><b>ENT_COMPAT</b></td>
2752
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2753
     *                              </tr>
2754
     *                              <tr valign="top">
2755
     *                              <td><b>ENT_QUOTES</b></td>
2756
     *                              <td>Will convert both double and single quotes.</td>
2757
     *                              </tr>
2758
     *                              <tr valign="top">
2759
     *                              <td><b>ENT_NOQUOTES</b></td>
2760
     *                              <td>Will leave both double and single quotes unconverted.</td>
2761
     *                              </tr>
2762
     *                              <tr valign="top">
2763
     *                              <td><b>ENT_IGNORE</b></td>
2764
     *                              <td>
2765
     *                              Silently discard invalid code unit sequences instead of returning
2766
     *                              an empty string. Using this flag is discouraged as it
2767
     *                              may have security implications.
2768
     *                              </td>
2769
     *                              </tr>
2770
     *                              <tr valign="top">
2771
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2772
     *                              <td>
2773
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2774
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2775
     *                              string.
2776
     *                              </td>
2777
     *                              </tr>
2778
     *                              <tr valign="top">
2779
     *                              <td><b>ENT_DISALLOWED</b></td>
2780
     *                              <td>
2781
     *                              Replace invalid code points for the given document type with a
2782
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2783
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2784
     *                              instance, to ensure the well-formedness of XML documents with
2785
     *                              embedded external content.
2786
     *                              </td>
2787
     *                              </tr>
2788
     *                              <tr valign="top">
2789
     *                              <td><b>ENT_HTML401</b></td>
2790
     *                              <td>
2791
     *                              Handle code as HTML 4.01.
2792
     *                              </td>
2793
     *                              </tr>
2794
     *                              <tr valign="top">
2795
     *                              <td><b>ENT_XML1</b></td>
2796
     *                              <td>
2797
     *                              Handle code as XML 1.
2798
     *                              </td>
2799
     *                              </tr>
2800
     *                              <tr valign="top">
2801
     *                              <td><b>ENT_XHTML</b></td>
2802
     *                              <td>
2803
     *                              Handle code as XHTML.
2804
     *                              </td>
2805
     *                              </tr>
2806
     *                              <tr valign="top">
2807
     *                              <td><b>ENT_HTML5</b></td>
2808
     *                              <td>
2809
     *                              Handle code as HTML 5.
2810
     *                              </td>
2811
     *                              </tr>
2812
     *                              </table>
2813
     *                              </p>
2814
     * @param string $encoding      [optional] <p>
2815
     *                              Defines encoding used in conversion.
2816
     *                              </p>
2817
     *                              <p>
2818
     *                              For the purposes of this function, the encodings
2819
     *                              ISO-8859-1, ISO-8859-15,
2820
     *                              UTF-8, cp866,
2821
     *                              cp1251, cp1252, and
2822
     *                              KOI8-R are effectively equivalent, provided the
2823
     *                              <i>string</i> itself is valid for the encoding, as
2824
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2825
     *                              the same positions in all of these encodings.
2826
     *                              </p>
2827
     * @param bool   $double_encode [optional] <p>
2828
     *                              When <i>double_encode</i> is turned off PHP will not
2829
     *                              encode existing html entities, the default is to convert everything.
2830
     *                              </p>
2831
     *
2832
     * @return string the converted string.
2833
     *                </p>
2834
     *                <p>
2835
     *                If the input <i>string</i> contains an invalid code unit
2836
     *                sequence within the given <i>encoding</i> an empty string
2837
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2838
     *                <b>ENT_SUBSTITUTE</b> flags are set
2839
     */
2840 8
    public static function htmlspecialchars(
2841
        string $str,
2842
        int $flags = \ENT_COMPAT,
2843
        string $encoding = 'UTF-8',
2844
        bool $double_encode = true
2845
    ): string {
2846 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2847 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2848
        }
2849
2850 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2851
    }
2852
2853
    /**
2854
     * Checks whether iconv is available on the server.
2855
     *
2856
     * @return bool
2857
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2858
     */
2859
    public static function iconv_loaded(): bool
2860
    {
2861
        return \extension_loaded('iconv');
2862
    }
2863
2864
    /**
2865
     * alias for "UTF8::decimal_to_chr()"
2866
     *
2867
     * @param mixed $int
2868
     *
2869
     * @return string
2870
     *
2871
     * @see UTF8::decimal_to_chr()
2872
     */
2873 4
    public static function int_to_chr($int): string
2874
    {
2875 4
        return self::decimal_to_chr($int);
2876
    }
2877
2878
    /**
2879
     * Converts Integer to hexadecimal U+xxxx code point representation.
2880
     *
2881
     * INFO: opposite to UTF8::hex_to_int()
2882
     *
2883
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2884
     * @param string $pfix [optional]
2885
     *
2886
     * @return string the code point, or empty string on failure
2887
     */
2888 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2889
    {
2890 6
        $hex = \dechex($int);
2891
2892 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2893
2894 6
        return $pfix . $hex . '';
2895
    }
2896
2897
    /**
2898
     * Checks whether intl-char is available on the server.
2899
     *
2900
     * @return bool
2901
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2902
     */
2903
    public static function intlChar_loaded(): bool
2904
    {
2905
        return \class_exists('IntlChar');
2906
    }
2907
2908
    /**
2909
     * Checks whether intl is available on the server.
2910
     *
2911
     * @return bool
2912
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2913
     */
2914 5
    public static function intl_loaded(): bool
2915
    {
2916 5
        return \extension_loaded('intl');
2917
    }
2918
2919
    /**
2920
     * alias for "UTF8::is_ascii()"
2921
     *
2922
     * @param string $str
2923
     *
2924
     * @return bool
2925
     *
2926
     * @see UTF8::is_ascii()
2927
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2928
     */
2929 2
    public static function isAscii(string $str): bool
2930
    {
2931 2
        return ASCII::is_ascii($str);
2932
    }
2933
2934
    /**
2935
     * alias for "UTF8::is_base64()"
2936
     *
2937
     * @param string $str
2938
     *
2939
     * @return bool
2940
     *
2941
     * @see UTF8::is_base64()
2942
     * @deprecated <p>please use "UTF8::is_base64()"</p>
2943
     */
2944 2
    public static function isBase64($str): bool
2945
    {
2946 2
        return self::is_base64($str);
2947
    }
2948
2949
    /**
2950
     * alias for "UTF8::is_binary()"
2951
     *
2952
     * @param mixed $str
2953
     * @param bool  $strict
2954
     *
2955
     * @return bool
2956
     *
2957
     * @see UTF8::is_binary()
2958
     * @deprecated <p>please use "UTF8::is_binary()"</p>
2959
     */
2960 4
    public static function isBinary($str, $strict = false): bool
2961
    {
2962 4
        return self::is_binary($str, $strict);
2963
    }
2964
2965
    /**
2966
     * alias for "UTF8::is_bom()"
2967
     *
2968
     * @param string $utf8_chr
2969
     *
2970
     * @return bool
2971
     *
2972
     * @see UTF8::is_bom()
2973
     * @deprecated <p>please use "UTF8::is_bom()"</p>
2974
     */
2975 2
    public static function isBom(string $utf8_chr): bool
2976
    {
2977 2
        return self::is_bom($utf8_chr);
2978
    }
2979
2980
    /**
2981
     * alias for "UTF8::is_html()"
2982
     *
2983
     * @param string $str
2984
     *
2985
     * @return bool
2986
     *
2987
     * @see UTF8::is_html()
2988
     * @deprecated <p>please use "UTF8::is_html()"</p>
2989
     */
2990 2
    public static function isHtml(string $str): bool
2991
    {
2992 2
        return self::is_html($str);
2993
    }
2994
2995
    /**
2996
     * alias for "UTF8::is_json()"
2997
     *
2998
     * @param string $str
2999
     *
3000
     * @return bool
3001
     *
3002
     * @see UTF8::is_json()
3003
     * @deprecated <p>please use "UTF8::is_json()"</p>
3004
     */
3005
    public static function isJson(string $str): bool
3006
    {
3007
        return self::is_json($str);
3008
    }
3009
3010
    /**
3011
     * alias for "UTF8::is_utf16()"
3012
     *
3013
     * @param mixed $str
3014
     *
3015
     * @return false|int
3016
     *                   <strong>false</strong> if is't not UTF16,<br>
3017
     *                   <strong>1</strong> for UTF-16LE,<br>
3018
     *                   <strong>2</strong> for UTF-16BE
3019
     *
3020
     * @see UTF8::is_utf16()
3021
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3022
     */
3023 2
    public static function isUtf16($str)
3024
    {
3025 2
        return self::is_utf16($str);
3026
    }
3027
3028
    /**
3029
     * alias for "UTF8::is_utf32()"
3030
     *
3031
     * @param mixed $str
3032
     *
3033
     * @return false|int
3034
     *                   <strong>false</strong> if is't not UTF16,
3035
     *                   <strong>1</strong> for UTF-32LE,
3036
     *                   <strong>2</strong> for UTF-32BE
3037
     *
3038
     * @see UTF8::is_utf32()
3039
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3040
     */
3041 2
    public static function isUtf32($str)
3042
    {
3043 2
        return self::is_utf32($str);
3044
    }
3045
3046
    /**
3047
     * alias for "UTF8::is_utf8()"
3048
     *
3049
     * @param string $str
3050
     * @param bool   $strict
3051
     *
3052
     * @return bool
3053
     *
3054
     * @see UTF8::is_utf8()
3055
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3056
     */
3057 17
    public static function isUtf8($str, $strict = false): bool
3058
    {
3059 17
        return self::is_utf8($str, $strict);
3060
    }
3061
3062
    /**
3063
     * Returns true if the string contains only alphabetic chars, false otherwise.
3064
     *
3065
     * @param string $str
3066
     *
3067
     * @return bool
3068
     *              Whether or not $str contains only alphabetic chars
3069
     */
3070 10
    public static function is_alpha(string $str): bool
3071
    {
3072 10
        if (self::$SUPPORT['mbstring'] === true) {
3073
            /** @noinspection PhpComposerExtensionStubsInspection */
3074 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3075
        }
3076
3077
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3078
    }
3079
3080
    /**
3081
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3082
     *
3083
     * @param string $str
3084
     *
3085
     * @return bool
3086
     *              Whether or not $str contains only alphanumeric chars
3087
     */
3088 13
    public static function is_alphanumeric(string $str): bool
3089
    {
3090 13
        if (self::$SUPPORT['mbstring'] === true) {
3091
            /** @noinspection PhpComposerExtensionStubsInspection */
3092 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3093
        }
3094
3095
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3096
    }
3097
3098
    /**
3099
     * Checks if a string is 7 bit ASCII.
3100
     *
3101
     * @param string $str <p>The string to check.</p>
3102
     *
3103
     * @return bool
3104
     *              <strong>true</strong> if it is ASCII<br>
3105
     *              <strong>false</strong> otherwise
3106
     */
3107 8
    public static function is_ascii(string $str): bool
3108
    {
3109 8
        return ASCII::is_ascii($str);
3110
    }
3111
3112
    /**
3113
     * Returns true if the string is base64 encoded, false otherwise.
3114
     *
3115
     * @param mixed|string $str                <p>The input string.</p>
3116
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3117
     *
3118
     * @return bool whether or not $str is base64 encoded
3119
     */
3120 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3121
    {
3122 16
        if ($emptyStringIsValid === false && $str === '') {
3123 3
            return false;
3124
        }
3125
3126
        /**
3127
         * @psalm-suppress RedundantConditionGivenDocblockType
3128
         */
3129 15
        if (\is_string($str) === false) {
3130 2
            return false;
3131
        }
3132
3133 15
        $base64String = \base64_decode($str, true);
3134
3135 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3136
    }
3137
3138
    /**
3139
     * Check if the input is binary... (is look like a hack).
3140
     *
3141
     * @param mixed $input
3142
     * @param bool  $strict
3143
     *
3144
     * @return bool
3145
     */
3146 39
    public static function is_binary($input, bool $strict = false): bool
3147
    {
3148 39
        $input = (string) $input;
3149 39
        if ($input === '') {
3150 10
            return false;
3151
        }
3152
3153 39
        if (\preg_match('~^[01]+$~', $input)) {
3154 13
            return true;
3155
        }
3156
3157 39
        $ext = self::get_file_type($input);
3158 39
        if ($ext['type'] === 'binary') {
3159 7
            return true;
3160
        }
3161
3162 38
        $testLength = \strlen($input);
3163 38
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3164 38
        if (($testNull / $testLength) > 0.25) {
3165 15
            return true;
3166
        }
3167
3168 34
        if ($strict === true) {
3169 34
            if (self::$SUPPORT['finfo'] === false) {
3170
                throw new \RuntimeException('ext-fileinfo: is not installed');
3171
            }
3172
3173
            /** @noinspection PhpComposerExtensionStubsInspection */
3174 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3175 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3176 15
                return true;
3177
            }
3178
        }
3179
3180 30
        return false;
3181
    }
3182
3183
    /**
3184
     * Check if the file is binary.
3185
     *
3186
     * @param string $file
3187
     *
3188
     * @return bool
3189
     */
3190 6
    public static function is_binary_file($file): bool
3191
    {
3192
        // init
3193 6
        $block = '';
3194
3195 6
        $fp = \fopen($file, 'rb');
3196 6
        if (\is_resource($fp)) {
3197 6
            $block = \fread($fp, 512);
3198 6
            \fclose($fp);
3199
        }
3200
3201 6
        if ($block === '') {
3202 2
            return false;
3203
        }
3204
3205 6
        return self::is_binary($block, true);
3206
    }
3207
3208
    /**
3209
     * Returns true if the string contains only whitespace chars, false otherwise.
3210
     *
3211
     * @param string $str
3212
     *
3213
     * @return bool
3214
     *              Whether or not $str contains only whitespace characters
3215
     */
3216 15
    public static function is_blank(string $str): bool
3217
    {
3218 15
        if (self::$SUPPORT['mbstring'] === true) {
3219
            /** @noinspection PhpComposerExtensionStubsInspection */
3220 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3221
        }
3222
3223
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3224
    }
3225
3226
    /**
3227
     * Checks if the given string is equal to any "Byte Order Mark".
3228
     *
3229
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3230
     *
3231
     * @param string $str <p>The input string.</p>
3232
     *
3233
     * @return bool
3234
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3235
     */
3236 2
    public static function is_bom($str): bool
3237
    {
3238
        /** @noinspection PhpUnusedLocalVariableInspection */
3239 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3240 2
            if ($str === $bomString) {
3241 2
                return true;
3242
            }
3243
        }
3244
3245 2
        return false;
3246
    }
3247
3248
    /**
3249
     * Determine whether the string is considered to be empty.
3250
     *
3251
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3252
     * empty() does not generate a warning if the variable does not exist.
3253
     *
3254
     * @param mixed $str
3255
     *
3256
     * @return bool whether or not $str is empty()
3257
     */
3258
    public static function is_empty($str): bool
3259
    {
3260
        return empty($str);
3261
    }
3262
3263
    /**
3264
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3265
     *
3266
     * @param string $str
3267
     *
3268
     * @return bool
3269
     *              Whether or not $str contains only hexadecimal chars
3270
     */
3271 13
    public static function is_hexadecimal(string $str): bool
3272
    {
3273 13
        if (self::$SUPPORT['mbstring'] === true) {
3274
            /** @noinspection PhpComposerExtensionStubsInspection */
3275 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3276
        }
3277
3278
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3279
    }
3280
3281
    /**
3282
     * Check if the string contains any html-tags <lall>.
3283
     *
3284
     * @param string $str <p>The input string.</p>
3285
     *
3286
     * @return bool
3287
     */
3288 3
    public static function is_html(string $str): bool
3289
    {
3290 3
        if ($str === '') {
3291 3
            return false;
3292
        }
3293
3294
        // init
3295 3
        $matches = [];
3296
3297 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3298
3299 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3300
3301 3
        return \count($matches) !== 0;
3302
    }
3303
3304
    /**
3305
     * Try to check if "$str" is an json-string.
3306
     *
3307
     * @param string $str                              <p>The input string.</p>
3308
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3309
     *
3310
     * @return bool
3311
     */
3312 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3313
    {
3314 42
        if ($str === '') {
3315 4
            return false;
3316
        }
3317
3318 40
        if (self::$SUPPORT['json'] === false) {
3319
            throw new \RuntimeException('ext-json: is not installed');
3320
        }
3321
3322 40
        $json = self::json_decode($str);
3323 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3324 18
            return false;
3325
        }
3326
3327
        if (
3328 24
            $onlyArrayOrObjectResultsAreValid === true
3329
            &&
3330 24
            \is_object($json) === false
3331
            &&
3332 24
            \is_array($json) === false
3333
        ) {
3334 5
            return false;
3335
        }
3336
3337
        /** @noinspection PhpComposerExtensionStubsInspection */
3338 19
        return \json_last_error() === \JSON_ERROR_NONE;
3339
    }
3340
3341
    /**
3342
     * @param string $str
3343
     *
3344
     * @return bool
3345
     */
3346 8
    public static function is_lowercase(string $str): bool
3347
    {
3348 8
        if (self::$SUPPORT['mbstring'] === true) {
3349
            /** @noinspection PhpComposerExtensionStubsInspection */
3350 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3351
        }
3352
3353
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3354
    }
3355
3356
    /**
3357
     * Returns true if the string is serialized, false otherwise.
3358
     *
3359
     * @param string $str
3360
     *
3361
     * @return bool whether or not $str is serialized
3362
     */
3363 7
    public static function is_serialized(string $str): bool
3364
    {
3365 7
        if ($str === '') {
3366 1
            return false;
3367
        }
3368
3369
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3370
        /** @noinspection UnserializeExploitsInspection */
3371 6
        return $str === 'b:0;'
3372
               ||
3373 6
               @\unserialize($str) !== false;
3374
    }
3375
3376
    /**
3377
     * Returns true if the string contains only lower case chars, false
3378
     * otherwise.
3379
     *
3380
     * @param string $str <p>The input string.</p>
3381
     *
3382
     * @return bool
3383
     *              Whether or not $str contains only lower case characters
3384
     */
3385 8
    public static function is_uppercase(string $str): bool
3386
    {
3387 8
        if (self::$SUPPORT['mbstring'] === true) {
3388
            /** @noinspection PhpComposerExtensionStubsInspection */
3389 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3390
        }
3391
3392
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3393
    }
3394
3395
    /**
3396
     * Check if the string is UTF-16.
3397
     *
3398
     * @param mixed $str                   <p>The input string.</p>
3399
     * @param bool  $checkIfStringIsBinary
3400
     *
3401
     * @return false|int
3402
     *                   <strong>false</strong> if is't not UTF-16,<br>
3403
     *                   <strong>1</strong> for UTF-16LE,<br>
3404
     *                   <strong>2</strong> for UTF-16BE
3405
     */
3406 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3407
    {
3408
        // init
3409 22
        $str = (string) $str;
3410 22
        $strChars = [];
3411
3412
        if (
3413 22
            $checkIfStringIsBinary === true
3414
            &&
3415 22
            self::is_binary($str, true) === false
3416
        ) {
3417 2
            return false;
3418
        }
3419
3420 22
        if (self::$SUPPORT['mbstring'] === false) {
3421 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3422
        }
3423
3424 22
        $str = self::remove_bom($str);
3425
3426 22
        $maybeUTF16LE = 0;
3427 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3428 22
        if ($test) {
3429 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3430 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3431 15
            if ($test3 === $test) {
3432 15
                if (\count($strChars) === 0) {
3433 15
                    $strChars = self::count_chars($str, true, false);
3434
                }
3435 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3436 15
                    if (\in_array($test3char, $strChars, true) === true) {
3437 15
                        ++$maybeUTF16LE;
3438
                    }
3439
                }
3440 15
                unset($test3charEmpty);
3441
            }
3442
        }
3443
3444 22
        $maybeUTF16BE = 0;
3445 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3446 22
        if ($test) {
3447 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3448 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3449 15
            if ($test3 === $test) {
3450 15
                if (\count($strChars) === 0) {
3451 7
                    $strChars = self::count_chars($str, true, false);
3452
                }
3453 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3454 15
                    if (\in_array($test3char, $strChars, true) === true) {
3455 15
                        ++$maybeUTF16BE;
3456
                    }
3457
                }
3458 15
                unset($test3charEmpty);
3459
            }
3460
        }
3461
3462 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3463 7
            if ($maybeUTF16LE > $maybeUTF16BE) {
3464 5
                return 1;
3465
            }
3466
3467 6
            return 2;
3468
        }
3469
3470 18
        return false;
3471
    }
3472
3473
    /**
3474
     * Check if the string is UTF-32.
3475
     *
3476
     * @param mixed $str                   <p>The input string.</p>
3477
     * @param bool  $checkIfStringIsBinary
3478
     *
3479
     * @return false|int
3480
     *                   <strong>false</strong> if is't not UTF-32,<br>
3481
     *                   <strong>1</strong> for UTF-32LE,<br>
3482
     *                   <strong>2</strong> for UTF-32BE
3483
     */
3484 20
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3485
    {
3486
        // init
3487 20
        $str = (string) $str;
3488 20
        $strChars = [];
3489
3490
        if (
3491 20
            $checkIfStringIsBinary === true
3492
            &&
3493 20
            self::is_binary($str, true) === false
3494
        ) {
3495 2
            return false;
3496
        }
3497
3498 20
        if (self::$SUPPORT['mbstring'] === false) {
3499 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3500
        }
3501
3502 20
        $str = self::remove_bom($str);
3503
3504 20
        $maybeUTF32LE = 0;
3505 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3506 20
        if ($test) {
3507 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3508 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3509 13
            if ($test3 === $test) {
3510 13
                if (\count($strChars) === 0) {
3511 13
                    $strChars = self::count_chars($str, true, false);
3512
                }
3513 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3514 13
                    if (\in_array($test3char, $strChars, true) === true) {
3515 13
                        ++$maybeUTF32LE;
3516
                    }
3517
                }
3518 13
                unset($test3charEmpty);
3519
            }
3520
        }
3521
3522 20
        $maybeUTF32BE = 0;
3523 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3524 20
        if ($test) {
3525 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3526 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3527 13
            if ($test3 === $test) {
3528 13
                if (\count($strChars) === 0) {
3529 7
                    $strChars = self::count_chars($str, true, false);
3530
                }
3531 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3532 13
                    if (\in_array($test3char, $strChars, true) === true) {
3533 13
                        ++$maybeUTF32BE;
3534
                    }
3535
                }
3536 13
                unset($test3charEmpty);
3537
            }
3538
        }
3539
3540 20
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3541 3
            if ($maybeUTF32LE > $maybeUTF32BE) {
3542 2
                return 1;
3543
            }
3544
3545 3
            return 2;
3546
        }
3547
3548 20
        return false;
3549
    }
3550
3551
    /**
3552
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3553
     *
3554
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3555
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3556
     *
3557
     * @return bool
3558
     */
3559 82
    public static function is_utf8($str, bool $strict = false): bool
3560
    {
3561 82
        if (\is_array($str) === true) {
3562 2
            foreach ($str as &$v) {
3563 2
                if (self::is_utf8($v, $strict) === false) {
3564 2
                    return false;
3565
                }
3566
            }
3567
3568
            return true;
3569
        }
3570
3571 82
        return self::is_utf8_string((string) $str, $strict);
3572
    }
3573
3574
    /**
3575
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3576
     * Decodes a JSON string
3577
     *
3578
     * @see http://php.net/manual/en/function.json-decode.php
3579
     *
3580
     * @param string $json    <p>
3581
     *                        The <i>json</i> string being decoded.
3582
     *                        </p>
3583
     *                        <p>
3584
     *                        This function only works with UTF-8 encoded strings.
3585
     *                        </p>
3586
     *                        <p>PHP implements a superset of
3587
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3588
     *                        only supports these values when they are nested inside an array or an object.
3589
     *                        </p>
3590
     * @param bool   $assoc   [optional] <p>
3591
     *                        When <b>TRUE</b>, returned objects will be converted into
3592
     *                        associative arrays.
3593
     *                        </p>
3594
     * @param int    $depth   [optional] <p>
3595
     *                        User specified recursion depth.
3596
     *                        </p>
3597
     * @param int    $options [optional] <p>
3598
     *                        Bitmask of JSON decode options. Currently only
3599
     *                        <b>JSON_BIGINT_AS_STRING</b>
3600
     *                        is supported (default is to cast large integers as floats)
3601
     *                        </p>
3602
     *
3603
     * @return mixed
3604
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3605
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3606
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3607
     *               is deeper than the recursion limit.
3608
     */
3609 43
    public static function json_decode(
3610
        string $json,
3611
        bool $assoc = false,
3612
        int $depth = 512,
3613
        int $options = 0
3614
    ) {
3615 43
        $json = self::filter($json);
3616
3617 43
        if (self::$SUPPORT['json'] === false) {
3618
            throw new \RuntimeException('ext-json: is not installed');
3619
        }
3620
3621
        /** @noinspection PhpComposerExtensionStubsInspection */
3622 43
        return \json_decode($json, $assoc, $depth, $options);
3623
    }
3624
3625
    /**
3626
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3627
     * Returns the JSON representation of a value.
3628
     *
3629
     * @see http://php.net/manual/en/function.json-encode.php
3630
     *
3631
     * @param mixed $value   <p>
3632
     *                       The <i>value</i> being encoded. Can be any type except
3633
     *                       a resource.
3634
     *                       </p>
3635
     *                       <p>
3636
     *                       All string data must be UTF-8 encoded.
3637
     *                       </p>
3638
     *                       <p>PHP implements a superset of
3639
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3640
     *                       only supports these values when they are nested inside an array or an object.
3641
     *                       </p>
3642
     * @param int   $options [optional] <p>
3643
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3644
     *                       <b>JSON_HEX_TAG</b>,
3645
     *                       <b>JSON_HEX_AMP</b>,
3646
     *                       <b>JSON_HEX_APOS</b>,
3647
     *                       <b>JSON_NUMERIC_CHECK</b>,
3648
     *                       <b>JSON_PRETTY_PRINT</b>,
3649
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3650
     *                       <b>JSON_FORCE_OBJECT</b>,
3651
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3652
     *                       constants is described on
3653
     *                       the JSON constants page.
3654
     *                       </p>
3655
     * @param int   $depth   [optional] <p>
3656
     *                       Set the maximum depth. Must be greater than zero.
3657
     *                       </p>
3658
     *
3659
     * @return false|string
3660
     *                      A JSON encoded <strong>string</strong> on success or<br>
3661
     *                      <strong>FALSE</strong> on failure
3662
     */
3663 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3664
    {
3665 5
        $value = self::filter($value);
3666
3667 5
        if (self::$SUPPORT['json'] === false) {
3668
            throw new \RuntimeException('ext-json: is not installed');
3669
        }
3670
3671
        /** @noinspection PhpComposerExtensionStubsInspection */
3672 5
        return \json_encode($value, $options, $depth);
3673
    }
3674
3675
    /**
3676
     * Checks whether JSON is available on the server.
3677
     *
3678
     * @return bool
3679
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3680
     */
3681
    public static function json_loaded(): bool
3682
    {
3683
        return \function_exists('json_decode');
3684
    }
3685
3686
    /**
3687
     * Makes string's first char lowercase.
3688
     *
3689
     * @param string      $str                   <p>The input string</p>
3690
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3691
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3692
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3693
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3694
     *
3695
     * @return string the resulting string
3696
     */
3697 46
    public static function lcfirst(
3698
        string $str,
3699
        string $encoding = 'UTF-8',
3700
        bool $cleanUtf8 = false,
3701
        string $lang = null,
3702
        bool $tryToKeepStringLength = false
3703
    ): string {
3704 46
        if ($cleanUtf8 === true) {
3705
            $str = self::clean($str);
3706
        }
3707
3708 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3709
3710 46
        if ($encoding === 'UTF-8') {
3711 43
            $strPartTwo = (string) \mb_substr($str, 1);
3712
3713 43
            if ($useMbFunction === true) {
3714 43
                $strPartOne = \mb_strtolower(
3715 43
                    (string) \mb_substr($str, 0, 1)
3716
                );
3717
            } else {
3718
                $strPartOne = self::strtolower(
3719
                    (string) \mb_substr($str, 0, 1),
3720
                    $encoding,
3721
                    false,
3722
                    $lang,
3723 43
                    $tryToKeepStringLength
3724
                );
3725
            }
3726
        } else {
3727 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3728
3729 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3730
3731 3
            $strPartOne = self::strtolower(
3732 3
                (string) self::substr($str, 0, 1, $encoding),
3733 3
                $encoding,
3734 3
                false,
3735 3
                $lang,
3736 3
                $tryToKeepStringLength
3737
            );
3738
        }
3739
3740 46
        return $strPartOne . $strPartTwo;
3741
    }
3742
3743
    /**
3744
     * alias for "UTF8::lcfirst()"
3745
     *
3746
     * @param string      $str
3747
     * @param string      $encoding
3748
     * @param bool        $cleanUtf8
3749
     * @param string|null $lang
3750
     * @param bool        $tryToKeepStringLength
3751
     *
3752
     * @return string
3753
     *
3754
     * @see UTF8::lcfirst()
3755
     */
3756 2
    public static function lcword(
3757
        string $str,
3758
        string $encoding = 'UTF-8',
3759
        bool $cleanUtf8 = false,
3760
        string $lang = null,
3761
        bool $tryToKeepStringLength = false
3762
    ): string {
3763 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3764
    }
3765
3766
    /**
3767
     * Lowercase for all words in the string.
3768
     *
3769
     * @param string      $str                   <p>The input string.</p>
3770
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3771
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3772
     *                                           a new word.</p>
3773
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3774
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3775
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3776
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3777
     *
3778
     * @return string
3779
     */
3780 2
    public static function lcwords(
3781
        string $str,
3782
        array $exceptions = [],
3783
        string $charlist = '',
3784
        string $encoding = 'UTF-8',
3785
        bool $cleanUtf8 = false,
3786
        string $lang = null,
3787
        bool $tryToKeepStringLength = false
3788
    ): string {
3789 2
        if (!$str) {
3790 2
            return '';
3791
        }
3792
3793 2
        $words = self::str_to_words($str, $charlist);
3794 2
        $useExceptions = \count($exceptions) > 0;
3795
3796 2
        foreach ($words as &$word) {
3797 2
            if (!$word) {
3798 2
                continue;
3799
            }
3800
3801
            if (
3802 2
                $useExceptions === false
3803
                ||
3804 2
                !\in_array($word, $exceptions, true)
3805
            ) {
3806 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3807
            }
3808
        }
3809
3810 2
        return \implode('', $words);
3811
    }
3812
3813
    /**
3814
     * alias for "UTF8::lcfirst()"
3815
     *
3816
     * @param string      $str
3817
     * @param string      $encoding
3818
     * @param bool        $cleanUtf8
3819
     * @param string|null $lang
3820
     * @param bool        $tryToKeepStringLength
3821
     *
3822
     * @return string
3823
     *
3824
     * @see UTF8::lcfirst()
3825
     */
3826 5
    public static function lowerCaseFirst(
3827
        string $str,
3828
        string $encoding = 'UTF-8',
3829
        bool $cleanUtf8 = false,
3830
        string $lang = null,
3831
        bool $tryToKeepStringLength = false
3832
    ): string {
3833 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3834
    }
3835
3836
    /**
3837
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3838
     *
3839
     * @param string      $str   <p>The string to be trimmed</p>
3840
     * @param string|null $chars <p>Optional characters to be stripped</p>
3841
     *
3842
     * @return string the string with unwanted characters stripped from the left
3843
     */
3844 22
    public static function ltrim(string $str = '', string $chars = null): string
3845
    {
3846 22
        if ($str === '') {
3847 3
            return '';
3848
        }
3849
3850 21
        if ($chars) {
3851 10
            $chars = \preg_quote($chars, '/');
3852 10
            $pattern = "^[${chars}]+";
3853
        } else {
3854 14
            $pattern = '^[\\s]+';
3855
        }
3856
3857 21
        if (self::$SUPPORT['mbstring'] === true) {
3858
            /** @noinspection PhpComposerExtensionStubsInspection */
3859 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3860
        }
3861
3862
        return self::regex_replace($str, $pattern, '', '', '/');
3863
    }
3864
3865
    /**
3866
     * Returns the UTF-8 character with the maximum code point in the given data.
3867
     *
3868
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3869
     *
3870
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3871
     */
3872 2
    public static function max($arg)
3873
    {
3874 2
        if (\is_array($arg) === true) {
3875 2
            $arg = \implode('', $arg);
3876
        }
3877
3878 2
        $codepoints = self::codepoints($arg, false);
3879 2
        if (\count($codepoints) === 0) {
3880 2
            return null;
3881
        }
3882
3883 2
        $codepoint_max = \max($codepoints);
3884
3885 2
        return self::chr($codepoint_max);
3886
    }
3887
3888
    /**
3889
     * Calculates and returns the maximum number of bytes taken by any
3890
     * UTF-8 encoded character in the given string.
3891
     *
3892
     * @param string $str <p>The original Unicode string.</p>
3893
     *
3894
     * @return int max byte lengths of the given chars
3895
     */
3896 2
    public static function max_chr_width(string $str): int
3897
    {
3898 2
        $bytes = self::chr_size_list($str);
3899 2
        if (\count($bytes) > 0) {
3900 2
            return (int) \max($bytes);
3901
        }
3902
3903 2
        return 0;
3904
    }
3905
3906
    /**
3907
     * Checks whether mbstring is available on the server.
3908
     *
3909
     * @return bool
3910
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3911
     */
3912 27
    public static function mbstring_loaded(): bool
3913
    {
3914 27
        return \extension_loaded('mbstring');
3915
    }
3916
3917
    /**
3918
     * Returns the UTF-8 character with the minimum code point in the given data.
3919
     *
3920
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3921
     *
3922
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3923
     */
3924 2
    public static function min($arg)
3925
    {
3926 2
        if (\is_array($arg) === true) {
3927 2
            $arg = \implode('', $arg);
3928
        }
3929
3930 2
        $codepoints = self::codepoints($arg, false);
3931 2
        if (\count($codepoints) === 0) {
3932 2
            return null;
3933
        }
3934
3935 2
        $codepoint_min = \min($codepoints);
3936
3937 2
        return self::chr($codepoint_min);
3938
    }
3939
3940
    /**
3941
     * alias for "UTF8::normalize_encoding()"
3942
     *
3943
     * @param mixed $encoding
3944
     * @param mixed $fallback
3945
     *
3946
     * @return mixed
3947
     *
3948
     * @see UTF8::normalize_encoding()
3949
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
3950
     */
3951 2
    public static function normalizeEncoding($encoding, $fallback = '')
3952
    {
3953 2
        return self::normalize_encoding($encoding, $fallback);
3954
    }
3955
3956
    /**
3957
     * Normalize the encoding-"name" input.
3958
     *
3959
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3960
     * @param mixed $fallback <p>e.g.: UTF-8</p>
3961
     *
3962
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
3963
     */
3964 331
    public static function normalize_encoding($encoding, $fallback = '')
3965
    {
3966 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
3967
3968
        // init
3969 331
        $encoding = (string) $encoding;
3970
3971 331
        if (!$encoding) {
3972 285
            return $fallback;
3973
        }
3974
3975
        if (
3976 51
            $encoding === 'UTF-8'
3977
            ||
3978 51
            $encoding === 'UTF8'
3979
        ) {
3980 26
            return 'UTF-8';
3981
        }
3982
3983
        if (
3984 43
            $encoding === '8BIT'
3985
            ||
3986 43
            $encoding === 'BINARY'
3987
        ) {
3988
            return 'CP850';
3989
        }
3990
3991
        if (
3992 43
            $encoding === 'HTML'
3993
            ||
3994 43
            $encoding === 'HTML-ENTITIES'
3995
        ) {
3996 2
            return 'HTML-ENTITIES';
3997
        }
3998
3999
        if (
4000 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4001
            ||
4002 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4003
        ) {
4004 1
            return $fallback;
4005
        }
4006
4007 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4008 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4009
        }
4010
4011 6
        if (self::$ENCODINGS === null) {
4012 1
            self::$ENCODINGS = self::getData('encodings');
4013
        }
4014
4015 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4016 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4017
4018 4
            return $encoding;
4019
        }
4020
4021 5
        $encodingOrig = $encoding;
4022 5
        $encoding = \strtoupper($encoding);
4023 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4024
4025
        $equivalences = [
4026 5
            'ISO8859'     => 'ISO-8859-1',
4027
            'ISO88591'    => 'ISO-8859-1',
4028
            'ISO'         => 'ISO-8859-1',
4029
            'LATIN'       => 'ISO-8859-1',
4030
            'LATIN1'      => 'ISO-8859-1', // Western European
4031
            'ISO88592'    => 'ISO-8859-2',
4032
            'LATIN2'      => 'ISO-8859-2', // Central European
4033
            'ISO88593'    => 'ISO-8859-3',
4034
            'LATIN3'      => 'ISO-8859-3', // Southern European
4035
            'ISO88594'    => 'ISO-8859-4',
4036
            'LATIN4'      => 'ISO-8859-4', // Northern European
4037
            'ISO88595'    => 'ISO-8859-5',
4038
            'ISO88596'    => 'ISO-8859-6', // Greek
4039
            'ISO88597'    => 'ISO-8859-7',
4040
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4041
            'ISO88599'    => 'ISO-8859-9',
4042
            'LATIN5'      => 'ISO-8859-9', // Turkish
4043
            'ISO885911'   => 'ISO-8859-11',
4044
            'TIS620'      => 'ISO-8859-11', // Thai
4045
            'ISO885910'   => 'ISO-8859-10',
4046
            'LATIN6'      => 'ISO-8859-10', // Nordic
4047
            'ISO885913'   => 'ISO-8859-13',
4048
            'LATIN7'      => 'ISO-8859-13', // Baltic
4049
            'ISO885914'   => 'ISO-8859-14',
4050
            'LATIN8'      => 'ISO-8859-14', // Celtic
4051
            'ISO885915'   => 'ISO-8859-15',
4052
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4053
            'ISO885916'   => 'ISO-8859-16',
4054
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4055
            'CP1250'      => 'WINDOWS-1250',
4056
            'WIN1250'     => 'WINDOWS-1250',
4057
            'WINDOWS1250' => 'WINDOWS-1250',
4058
            'CP1251'      => 'WINDOWS-1251',
4059
            'WIN1251'     => 'WINDOWS-1251',
4060
            'WINDOWS1251' => 'WINDOWS-1251',
4061
            'CP1252'      => 'WINDOWS-1252',
4062
            'WIN1252'     => 'WINDOWS-1252',
4063
            'WINDOWS1252' => 'WINDOWS-1252',
4064
            'CP1253'      => 'WINDOWS-1253',
4065
            'WIN1253'     => 'WINDOWS-1253',
4066
            'WINDOWS1253' => 'WINDOWS-1253',
4067
            'CP1254'      => 'WINDOWS-1254',
4068
            'WIN1254'     => 'WINDOWS-1254',
4069
            'WINDOWS1254' => 'WINDOWS-1254',
4070
            'CP1255'      => 'WINDOWS-1255',
4071
            'WIN1255'     => 'WINDOWS-1255',
4072
            'WINDOWS1255' => 'WINDOWS-1255',
4073
            'CP1256'      => 'WINDOWS-1256',
4074
            'WIN1256'     => 'WINDOWS-1256',
4075
            'WINDOWS1256' => 'WINDOWS-1256',
4076
            'CP1257'      => 'WINDOWS-1257',
4077
            'WIN1257'     => 'WINDOWS-1257',
4078
            'WINDOWS1257' => 'WINDOWS-1257',
4079
            'CP1258'      => 'WINDOWS-1258',
4080
            'WIN1258'     => 'WINDOWS-1258',
4081
            'WINDOWS1258' => 'WINDOWS-1258',
4082
            'UTF16'       => 'UTF-16',
4083
            'UTF32'       => 'UTF-32',
4084
            'UTF8'        => 'UTF-8',
4085
            'UTF'         => 'UTF-8',
4086
            'UTF7'        => 'UTF-7',
4087
            '8BIT'        => 'CP850',
4088
            'BINARY'      => 'CP850',
4089
        ];
4090
4091 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4092 4
            $encoding = $equivalences[$encodingUpperHelper];
4093
        }
4094
4095 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4096
4097 5
        return $encoding;
4098
    }
4099
4100
    /**
4101
     * Standardize line ending to unix-like.
4102
     *
4103
     * @param string $str
4104
     *
4105
     * @return string
4106
     */
4107 5
    public static function normalize_line_ending(string $str): string
4108
    {
4109 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4110
    }
4111
4112
    /**
4113
     * Normalize some MS Word special characters.
4114
     *
4115
     * @param string $str <p>The string to be normalized.</p>
4116
     *
4117
     * @return string
4118
     */
4119 10
    public static function normalize_msword(string $str): string
4120
    {
4121 10
        return ASCII::normalize_msword($str);
4122
    }
4123
4124
    /**
4125
     * Normalize the whitespace.
4126
     *
4127
     * @param string $str                     <p>The string to be normalized.</p>
4128
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4129
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4130
     *                                        bidirectional text chars.</p>
4131
     *
4132
     * @return string
4133
     */
4134 61
    public static function normalize_whitespace(
4135
        string $str,
4136
        bool $keepNonBreakingSpace = false,
4137
        bool $keepBidiUnicodeControls = false
4138
    ): string {
4139 61
        return ASCII::normalize_whitespace(
4140 61
            $str,
4141 61
            $keepNonBreakingSpace,
4142 61
            $keepBidiUnicodeControls
4143
        );
4144
    }
4145
4146
    /**
4147
     * Calculates Unicode code point of the given UTF-8 encoded character.
4148
     *
4149
     * INFO: opposite to UTF8::chr()
4150
     *
4151
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4152
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4153
     *
4154
     * @return int
4155
     *             Unicode code point of the given character,<br>
4156
     *             0 on invalid UTF-8 byte sequence
4157
     */
4158 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4159
    {
4160 30
        static $CHAR_CACHE = [];
4161
4162
        // init
4163 30
        $chr = (string) $chr;
4164
4165 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4166 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4167
        }
4168
4169 30
        $cacheKey = $chr . $encoding;
4170 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4171 30
            return $CHAR_CACHE[$cacheKey];
4172
        }
4173
4174
        // check again, if it's still not UTF-8
4175 12
        if ($encoding !== 'UTF-8') {
4176 3
            $chr = self::encode($encoding, $chr);
4177
        }
4178
4179 12
        if (self::$ORD === null) {
4180
            self::$ORD = self::getData('ord');
4181
        }
4182
4183 12
        if (isset(self::$ORD[$chr])) {
4184 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4185
        }
4186
4187
        //
4188
        // fallback via "IntlChar"
4189
        //
4190
4191 6
        if (self::$SUPPORT['intlChar'] === true) {
4192
            /** @noinspection PhpComposerExtensionStubsInspection */
4193 5
            $code = \IntlChar::ord($chr);
4194 5
            if ($code) {
4195 5
                return $CHAR_CACHE[$cacheKey] = $code;
4196
            }
4197
        }
4198
4199
        //
4200
        // fallback via vanilla php
4201
        //
4202
4203
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4204 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4205
        /** @noinspection OffsetOperationsInspection */
4206 1
        $code = $chr ? $chr[1] : 0;
4207
4208
        /** @noinspection OffsetOperationsInspection */
4209 1
        if ($code >= 0xF0 && isset($chr[4])) {
4210
            /** @noinspection UnnecessaryCastingInspection */
4211
            /** @noinspection OffsetOperationsInspection */
4212
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4213
        }
4214
4215
        /** @noinspection OffsetOperationsInspection */
4216 1
        if ($code >= 0xE0 && isset($chr[3])) {
4217
            /** @noinspection UnnecessaryCastingInspection */
4218
            /** @noinspection OffsetOperationsInspection */
4219 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4220
        }
4221
4222
        /** @noinspection OffsetOperationsInspection */
4223 1
        if ($code >= 0xC0 && isset($chr[2])) {
4224
            /** @noinspection UnnecessaryCastingInspection */
4225
            /** @noinspection OffsetOperationsInspection */
4226 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4227
        }
4228
4229
        return $CHAR_CACHE[$cacheKey] = $code;
4230
    }
4231
4232
    /**
4233
     * Parses the string into an array (into the the second parameter).
4234
     *
4235
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4236
     *          if the second parameter is not set!
4237
     *
4238
     * @see http://php.net/manual/en/function.parse-str.php
4239
     *
4240
     * @param string $str       <p>The input string.</p>
4241
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4242
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4243
     *
4244
     * @return bool
4245
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4246
     */
4247 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4248
    {
4249 2
        if ($cleanUtf8 === true) {
4250 2
            $str = self::clean($str);
4251
        }
4252
4253 2
        if (self::$SUPPORT['mbstring'] === true) {
4254 2
            $return = \mb_parse_str($str, $result);
4255
4256 2
            return $return !== false && $result !== [];
4257
        }
4258
4259
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4260
        \parse_str($str, $result);
4261
4262
        return $result !== [];
4263
    }
4264
4265
    /**
4266
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4267
     *
4268
     * @return bool
4269
     *              <strong>true</strong> if support is available,<br>
4270
     *              <strong>false</strong> otherwise
4271
     */
4272 102
    public static function pcre_utf8_support(): bool
4273
    {
4274
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4275 102
        return (bool) @\preg_match('//u', '');
4276
    }
4277
4278
    /**
4279
     * Create an array containing a range of UTF-8 characters.
4280
     *
4281
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4282
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4283
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4284
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4285
     * @param float|int $step      [optional] <p>
4286
     *                             If a step value is given, it will be used as the
4287
     *                             increment between elements in the sequence. step
4288
     *                             should be given as a positive number. If not specified,
4289
     *                             step will default to 1.
4290
     *                             </p>
4291
     *
4292
     * @return string[]
4293
     */
4294 2
    public static function range(
4295
        $var1,
4296
        $var2,
4297
        bool $use_ctype = true,
4298
        string $encoding = 'UTF-8',
4299
        $step = 1
4300
    ): array {
4301 2
        if (!$var1 || !$var2) {
4302 2
            return [];
4303
        }
4304
4305 2
        if ($step !== 1) {
4306 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4307
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4308
            }
4309
4310 1
            if ($step <= 0) {
4311
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4312
            }
4313
        }
4314
4315 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4316
            throw new \RuntimeException('ext-ctype: is not installed');
4317
        }
4318
4319 2
        $is_digit = false;
4320 2
        $is_xdigit = false;
4321
4322
        /** @noinspection PhpComposerExtensionStubsInspection */
4323 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4324 2
            $is_digit = true;
4325 2
            $start = (int) $var1;
4326 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4327
            $is_xdigit = true;
4328
            $start = (int) self::hex_to_int($var1);
4329 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4330 1
            $start = (int) $var1;
4331
        } else {
4332 2
            $start = self::ord($var1);
4333
        }
4334
4335 2
        if (!$start) {
4336
            return [];
4337
        }
4338
4339 2
        if ($is_digit) {
4340 2
            $end = (int) $var2;
4341 2
        } elseif ($is_xdigit) {
4342
            $end = (int) self::hex_to_int($var2);
4343 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4344 1
            $end = (int) $var2;
4345
        } else {
4346 2
            $end = self::ord($var2);
4347
        }
4348
4349 2
        if (!$end) {
4350
            return [];
4351
        }
4352
4353 2
        $array = [];
4354 2
        foreach (\range($start, $end, $step) as $i) {
4355 2
            $array[] = (string) self::chr((int) $i, $encoding);
4356
        }
4357
4358 2
        return $array;
4359
    }
4360
4361
    /**
4362
     * Multi decode html entity & fix urlencoded-win1252-chars.
4363
     *
4364
     * e.g:
4365
     * 'test+test'                     => 'test+test'
4366
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4367
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4368
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4369
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4370
     * 'Düsseldorf'                   => 'Düsseldorf'
4371
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4372
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4373
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4374
     *
4375
     * @param string $str          <p>The input string.</p>
4376
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4377
     *
4378
     * @return string
4379
     */
4380 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4381
    {
4382 6
        if ($str === '') {
4383 4
            return '';
4384
        }
4385
4386
        if (
4387 6
            \strpos($str, '&') === false
4388
            &&
4389 6
            \strpos($str, '%') === false
4390
            &&
4391 6
            \strpos($str, '+') === false
4392
            &&
4393 6
            \strpos($str, '\u') === false
4394
        ) {
4395 4
            return self::fix_simple_utf8($str);
4396
        }
4397
4398 6
        $str = self::urldecode_unicode_helper($str);
4399
4400
        do {
4401 6
            $str_compare = $str;
4402
4403
            /**
4404
             * @psalm-suppress PossiblyInvalidArgument
4405
             */
4406 6
            $str = self::fix_simple_utf8(
4407 6
                \rawurldecode(
4408 6
                    self::html_entity_decode(
4409 6
                        self::to_utf8($str),
4410 6
                        \ENT_QUOTES | \ENT_HTML5
4411
                    )
4412
                )
4413
            );
4414 6
        } while ($multi_decode === true && $str_compare !== $str);
4415
4416 6
        return $str;
4417
    }
4418
4419
    /**
4420
     * Replaces all occurrences of $pattern in $str by $replacement.
4421
     *
4422
     * @param string $str         <p>The input string.</p>
4423
     * @param string $pattern     <p>The regular expression pattern.</p>
4424
     * @param string $replacement <p>The string to replace with.</p>
4425
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4426
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4427
     *
4428
     * @return string
4429
     */
4430 18
    public static function regex_replace(
4431
        string $str,
4432
        string $pattern,
4433
        string $replacement,
4434
        string $options = '',
4435
        string $delimiter = '/'
4436
    ): string {
4437 18
        if ($options === 'msr') {
4438 9
            $options = 'ms';
4439
        }
4440
4441
        // fallback
4442 18
        if (!$delimiter) {
4443
            $delimiter = '/';
4444
        }
4445
4446 18
        return (string) \preg_replace(
4447 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4448 18
            $replacement,
4449 18
            $str
4450
        );
4451
    }
4452
4453
    /**
4454
     * alias for "UTF8::remove_bom()"
4455
     *
4456
     * @param string $str
4457
     *
4458
     * @return string
4459
     *
4460
     * @see UTF8::remove_bom()
4461
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4462
     */
4463
    public static function removeBOM(string $str): string
4464
    {
4465
        return self::remove_bom($str);
4466
    }
4467
4468
    /**
4469
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4470
     *
4471
     * @param string $str <p>The input string.</p>
4472
     *
4473
     * @return string string without UTF-BOM
4474
     */
4475 55
    public static function remove_bom(string $str): string
4476
    {
4477 55
        if ($str === '') {
4478 9
            return '';
4479
        }
4480
4481 55
        $strLength = \strlen($str);
4482 55
        foreach (self::$BOM as $bomString => $bomByteLength) {
4483 55
            if (\strpos($str, $bomString, 0) === 0) {
4484 11
                $strTmp = \substr($str, $bomByteLength, $strLength);
4485 11
                if ($strTmp === false) {
4486
                    return '';
4487
                }
4488
4489 11
                $strLength -= (int) $bomByteLength;
4490
4491 55
                $str = (string) $strTmp;
4492
            }
4493
        }
4494
4495 55
        return $str;
4496
    }
4497
4498
    /**
4499
     * Removes duplicate occurrences of a string in another string.
4500
     *
4501
     * @param string          $str  <p>The base string.</p>
4502
     * @param string|string[] $what <p>String to search for in the base string.</p>
4503
     *
4504
     * @return string the result string with removed duplicates
4505
     */
4506 2
    public static function remove_duplicates(string $str, $what = ' '): string
4507
    {
4508 2
        if (\is_string($what) === true) {
4509 2
            $what = [$what];
4510
        }
4511
4512 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4513
            /** @noinspection ForeachSourceInspection */
4514 2
            foreach ($what as $item) {
4515 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4516
            }
4517
        }
4518
4519 2
        return $str;
4520
    }
4521
4522
    /**
4523
     * Remove html via "strip_tags()" from the string.
4524
     *
4525
     * @param string $str
4526
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4527
     *                              not be stripped. Default: null
4528
     *                              </p>
4529
     *
4530
     * @return string
4531
     */
4532 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4533
    {
4534 6
        return \strip_tags($str, $allowableTags);
4535
    }
4536
4537
    /**
4538
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4539
     *
4540
     * @param string $str
4541
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4542
     *
4543
     * @return string
4544
     */
4545 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4546
    {
4547 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4548
    }
4549
4550
    /**
4551
     * Remove invisible characters from a string.
4552
     *
4553
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4554
     *
4555
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4556
     *
4557
     * @param string $str
4558
     * @param bool   $url_encoded
4559
     * @param string $replacement
4560
     *
4561
     * @return string
4562
     */
4563 89
    public static function remove_invisible_characters(
4564
        string $str,
4565
        bool $url_encoded = true,
4566
        string $replacement = ''
4567
    ): string {
4568 89
        return ASCII::remove_invisible_characters(
4569 89
            $str,
4570 89
            $url_encoded,
4571 89
            $replacement
4572
        );
4573
    }
4574
4575
    /**
4576
     * Returns a new string with the prefix $substring removed, if present.
4577
     *
4578
     * @param string $str
4579
     * @param string $substring <p>The prefix to remove.</p>
4580
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4581
     *
4582
     * @return string string without the prefix $substring
4583
     */
4584 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4585
    {
4586 12
        if ($substring && \strpos($str, $substring) === 0) {
4587 6
            if ($encoding === 'UTF-8') {
4588 4
                return (string) \mb_substr(
4589 4
                    $str,
4590 4
                    (int) \mb_strlen($substring)
4591
                );
4592
            }
4593
4594 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4595
4596 2
            return (string) self::substr(
4597 2
                $str,
4598 2
                (int) self::strlen($substring, $encoding),
4599 2
                null,
4600 2
                $encoding
4601
            );
4602
        }
4603
4604 6
        return $str;
4605
    }
4606
4607
    /**
4608
     * Returns a new string with the suffix $substring removed, if present.
4609
     *
4610
     * @param string $str
4611
     * @param string $substring <p>The suffix to remove.</p>
4612
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4613
     *
4614
     * @return string string having a $str without the suffix $substring
4615
     */
4616 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4617
    {
4618 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4619 6
            if ($encoding === 'UTF-8') {
4620 4
                return (string) \mb_substr(
4621 4
                    $str,
4622 4
                    0,
4623 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4624
                );
4625
            }
4626
4627 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4628
4629 2
            return (string) self::substr(
4630 2
                $str,
4631 2
                0,
4632 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4633 2
                $encoding
4634
            );
4635
        }
4636
4637 6
        return $str;
4638
    }
4639
4640
    /**
4641
     * Replaces all occurrences of $search in $str by $replacement.
4642
     *
4643
     * @param string $str           <p>The input string.</p>
4644
     * @param string $search        <p>The needle to search for.</p>
4645
     * @param string $replacement   <p>The string to replace with.</p>
4646
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4647
     *
4648
     * @return string string after the replacements
4649
     */
4650 29
    public static function replace(
4651
        string $str,
4652
        string $search,
4653
        string $replacement,
4654
        bool $caseSensitive = true
4655
    ): string {
4656 29
        if ($caseSensitive) {
4657 22
            return \str_replace($search, $replacement, $str);
4658
        }
4659
4660 7
        return self::str_ireplace($search, $replacement, $str);
4661
    }
4662
4663
    /**
4664
     * Replaces all occurrences of $search in $str by $replacement.
4665
     *
4666
     * @param string       $str           <p>The input string.</p>
4667
     * @param array        $search        <p>The elements to search for.</p>
4668
     * @param array|string $replacement   <p>The string to replace with.</p>
4669
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4670
     *
4671
     * @return string string after the replacements
4672
     */
4673 30
    public static function replace_all(
4674
        string $str,
4675
        array $search,
4676
        $replacement,
4677
        bool $caseSensitive = true
4678
    ): string {
4679 30
        if ($caseSensitive) {
4680 23
            return \str_replace($search, $replacement, $str);
4681
        }
4682
4683 7
        return self::str_ireplace($search, $replacement, $str);
4684
    }
4685
4686
    /**
4687
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4688
     *
4689
     * @param string $str                <p>The input string</p>
4690
     * @param string $replacementChar    <p>The replacement character.</p>
4691
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4692
     *
4693
     * @return string
4694
     */
4695 35
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4696
    {
4697 35
        if ($str === '') {
4698 9
            return '';
4699
        }
4700
4701 35
        if ($processInvalidUtf8 === true) {
4702 35
            $replacementCharHelper = $replacementChar;
4703 35
            if ($replacementChar === '') {
4704 35
                $replacementCharHelper = 'none';
4705
            }
4706
4707 35
            if (self::$SUPPORT['mbstring'] === false) {
4708
                // if there is no native support for "mbstring",
4709
                // then we need to clean the string before ...
4710
                $str = self::clean($str);
4711
            }
4712
4713 35
            $save = \mb_substitute_character();
4714 35
            \mb_substitute_character($replacementCharHelper);
4715
            // the polyfill maybe return false, so cast to string
4716 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4717 35
            \mb_substitute_character($save);
4718
        }
4719
4720 35
        return \str_replace(
4721
            [
4722 35
                "\xEF\xBF\xBD",
4723
                '�',
4724
            ],
4725
            [
4726 35
                $replacementChar,
4727 35
                $replacementChar,
4728
            ],
4729 35
            $str
4730
        );
4731
    }
4732
4733
    /**
4734
     * Strip whitespace or other characters from end of a UTF-8 string.
4735
     *
4736
     * @param string      $str   <p>The string to be trimmed.</p>
4737
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4738
     *
4739
     * @return string the string with unwanted characters stripped from the right
4740
     */
4741 20
    public static function rtrim(string $str = '', string $chars = null): string
4742
    {
4743 20
        if ($str === '') {
4744 3
            return '';
4745
        }
4746
4747 19
        if ($chars) {
4748 8
            $chars = \preg_quote($chars, '/');
4749 8
            $pattern = "[${chars}]+$";
4750
        } else {
4751 14
            $pattern = '[\\s]+$';
4752
        }
4753
4754 19
        if (self::$SUPPORT['mbstring'] === true) {
4755
            /** @noinspection PhpComposerExtensionStubsInspection */
4756 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4757
        }
4758
4759
        return self::regex_replace($str, $pattern, '', '', '/');
4760
    }
4761
4762
    /**
4763
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4764
     *
4765
     * @psalm-suppress MissingReturnType
4766
     */
4767 2
    public static function showSupport()
4768
    {
4769 2
        echo '<pre>';
4770 2
        foreach (self::$SUPPORT as $key => &$value) {
4771 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4772
        }
4773 2
        unset($value);
4774 2
        echo '</pre>';
4775 2
    }
4776
4777
    /**
4778
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4779
     *
4780
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4781
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4782
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
4783
     *
4784
     * @return string the HTML numbered entity
4785
     */
4786 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
4787
    {
4788 2
        if ($char === '') {
4789 2
            return '';
4790
        }
4791
4792
        if (
4793 2
            $keepAsciiChars === true
4794
            &&
4795 2
            ASCII::is_ascii($char) === true
4796
        ) {
4797 2
            return $char;
4798
        }
4799
4800 2
        return '&#' . self::ord($char, $encoding) . ';';
4801
    }
4802
4803
    /**
4804
     * @param string $str
4805
     * @param int    $tabLength
4806
     *
4807
     * @return string
4808
     */
4809 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
4810
    {
4811 5
        if ($tabLength === 4) {
4812 3
            $tab = '    ';
4813 2
        } elseif ($tabLength === 2) {
4814 1
            $tab = '  ';
4815
        } else {
4816 1
            $tab = \str_repeat(' ', $tabLength);
4817
        }
4818
4819 5
        return \str_replace($tab, "\t", $str);
4820
    }
4821
4822
    /**
4823
     * alias for "UTF8::str_split()"
4824
     *
4825
     * @param string|string[] $str
4826
     * @param int             $length
4827
     * @param bool            $cleanUtf8
4828
     *
4829
     * @return string[]
4830
     *
4831
     * @see UTF8::str_split()
4832
     */
4833 9
    public static function split(
4834
        $str,
4835
        int $length = 1,
4836
        bool $cleanUtf8 = false
4837
    ): array {
4838 9
        return self::str_split($str, $length, $cleanUtf8);
4839
    }
4840
4841
    /**
4842
     * alias for "UTF8::str_starts_with()"
4843
     *
4844
     * @param string $haystack
4845
     * @param string $needle
4846
     *
4847
     * @return bool
4848
     *
4849
     * @see UTF8::str_starts_with()
4850
     */
4851
    public static function str_begins(string $haystack, string $needle): bool
4852
    {
4853
        return self::str_starts_with($haystack, $needle);
4854
    }
4855
4856
    /**
4857
     * Returns a camelCase version of the string. Trims surrounding spaces,
4858
     * capitalizes letters following digits, spaces, dashes and underscores,
4859
     * and removes spaces, dashes, as well as underscores.
4860
     *
4861
     * @param string      $str                   <p>The input string.</p>
4862
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
4863
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
4864
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4865
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4866
     *
4867
     * @return string
4868
     */
4869 32
    public static function str_camelize(
4870
        string $str,
4871
        string $encoding = 'UTF-8',
4872
        bool $cleanUtf8 = false,
4873
        string $lang = null,
4874
        bool $tryToKeepStringLength = false
4875
    ): string {
4876 32
        if ($cleanUtf8 === true) {
4877
            $str = self::clean($str);
4878
        }
4879
4880 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4881 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4882
        }
4883
4884 32
        $str = self::lcfirst(
4885 32
            \trim($str),
4886 32
            $encoding,
4887 32
            false,
4888 32
            $lang,
4889 32
            $tryToKeepStringLength
4890
        );
4891 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4892
4893 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
4894
4895 32
        $str = (string) \preg_replace_callback(
4896 32
            '/[-_\\s]+(.)?/u',
4897
            /**
4898
             * @param array $match
4899
             *
4900
             * @return string
4901
             */
4902
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
4903 27
                if (isset($match[1])) {
4904 27
                    if ($useMbFunction === true) {
4905 27
                        if ($encoding === 'UTF-8') {
4906 27
                            return \mb_strtoupper($match[1]);
4907
                        }
4908
4909
                        return \mb_strtoupper($match[1], $encoding);
4910
                    }
4911
4912
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
4913
                }
4914
4915 1
                return '';
4916 32
            },
4917 32
            $str
4918
        );
4919
4920 32
        return (string) \preg_replace_callback(
4921 32
            '/[\\p{N}]+(.)?/u',
4922
            /**
4923
             * @param array $match
4924
             *
4925
             * @return string
4926
             */
4927
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
4928 6
                if ($useMbFunction === true) {
4929 6
                    if ($encoding === 'UTF-8') {
4930 6
                        return \mb_strtoupper($match[0]);
4931
                    }
4932
4933
                    return \mb_strtoupper($match[0], $encoding);
4934
                }
4935
4936
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4937 32
            },
4938 32
            $str
4939
        );
4940
    }
4941
4942
    /**
4943
     * Returns the string with the first letter of each word capitalized,
4944
     * except for when the word is a name which shouldn't be capitalized.
4945
     *
4946
     * @param string $str
4947
     *
4948
     * @return string string with $str capitalized
4949
     */
4950 1
    public static function str_capitalize_name(string $str): string
4951
    {
4952 1
        return self::str_capitalize_name_helper(
4953 1
            self::str_capitalize_name_helper(
4954 1
                self::collapse_whitespace($str),
4955 1
                ' '
4956
            ),
4957 1
            '-'
4958
        );
4959
    }
4960
4961
    /**
4962
     * Returns true if the string contains $needle, false otherwise. By default
4963
     * the comparison is case-sensitive, but can be made insensitive by setting
4964
     * $caseSensitive to false.
4965
     *
4966
     * @param string $haystack      <p>The input string.</p>
4967
     * @param string $needle        <p>Substring to look for.</p>
4968
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4969
     *
4970
     * @return bool whether or not $haystack contains $needle
4971
     */
4972 21
    public static function str_contains(
4973
        string $haystack,
4974
        string $needle,
4975
        bool $caseSensitive = true
4976
    ): bool {
4977 21
        if ($caseSensitive) {
4978 11
            return \strpos($haystack, $needle) !== false;
4979
        }
4980
4981 10
        return \mb_stripos($haystack, $needle) !== false;
4982
    }
4983
4984
    /**
4985
     * Returns true if the string contains all $needles, false otherwise. By
4986
     * default the comparison is case-sensitive, but can be made insensitive by
4987
     * setting $caseSensitive to false.
4988
     *
4989
     * @param string $haystack      <p>The input string.</p>
4990
     * @param array  $needles       <p>SubStrings to look for.</p>
4991
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4992
     *
4993
     * @return bool whether or not $haystack contains $needle
4994
     */
4995 44
    public static function str_contains_all(
4996
        string $haystack,
4997
        array $needles,
4998
        bool $caseSensitive = true
4999
    ): bool {
5000 44
        if ($haystack === '' || $needles === []) {
5001 1
            return false;
5002
        }
5003
5004
        /** @noinspection LoopWhichDoesNotLoopInspection */
5005 43
        foreach ($needles as &$needle) {
5006 43
            if (!$needle) {
5007 1
                return false;
5008
            }
5009
5010 42
            if ($caseSensitive) {
5011 22
                return \strpos($haystack, $needle) !== false;
5012
            }
5013
5014 20
            return \mb_stripos($haystack, $needle) !== false;
5015
        }
5016
5017
        return true;
5018
    }
5019
5020
    /**
5021
     * Returns true if the string contains any $needles, false otherwise. By
5022
     * default the comparison is case-sensitive, but can be made insensitive by
5023
     * setting $caseSensitive to false.
5024
     *
5025
     * @param string $haystack      <p>The input string.</p>
5026
     * @param array  $needles       <p>SubStrings to look for.</p>
5027
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5028
     *
5029
     * @return bool
5030
     *              Whether or not $str contains $needle
5031
     */
5032 46
    public static function str_contains_any(
5033
        string $haystack,
5034
        array $needles,
5035
        bool $caseSensitive = true
5036
    ): bool {
5037 46
        if ($haystack === '' || $needles === []) {
5038 1
            return false;
5039
        }
5040
5041
        /** @noinspection LoopWhichDoesNotLoopInspection */
5042 45
        foreach ($needles as &$needle) {
5043 45
            if (!$needle) {
5044
                continue;
5045
            }
5046
5047 45
            if ($caseSensitive) {
5048 25
                if (\strpos($haystack, $needle) !== false) {
5049 14
                    return true;
5050
                }
5051
5052 13
                continue;
5053
            }
5054
5055 20
            if (\mb_stripos($haystack, $needle) !== false) {
5056 20
                return true;
5057
            }
5058
        }
5059
5060 19
        return false;
5061
    }
5062
5063
    /**
5064
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5065
     * inserted before uppercase characters (with the exception of the first
5066
     * character of the string), and in place of spaces as well as underscores.
5067
     *
5068
     * @param string $str      <p>The input string.</p>
5069
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5070
     *
5071
     * @return string
5072
     */
5073 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5074
    {
5075 19
        return self::str_delimit($str, '-', $encoding);
5076
    }
5077
5078
    /**
5079
     * Returns a lowercase and trimmed string separated by the given delimiter.
5080
     * Delimiters are inserted before uppercase characters (with the exception
5081
     * of the first character of the string), and in place of spaces, dashes,
5082
     * and underscores. Alpha delimiters are not converted to lowercase.
5083
     *
5084
     * @param string      $str                   <p>The input string.</p>
5085
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5086
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5087
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5088
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5089
     *                                           tr</p>
5090
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5091
     *                                           ß</p>
5092
     *
5093
     * @return string
5094
     */
5095 49
    public static function str_delimit(
5096
        string $str,
5097
        string $delimiter,
5098
        string $encoding = 'UTF-8',
5099
        bool $cleanUtf8 = false,
5100
        string $lang = null,
5101
        bool $tryToKeepStringLength = false
5102
    ): string {
5103 49
        if (self::$SUPPORT['mbstring'] === true) {
5104
            /** @noinspection PhpComposerExtensionStubsInspection */
5105 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5106
5107 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5108 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5109 22
                $str = \mb_strtolower($str);
5110
            } else {
5111 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5112
            }
5113
5114
            /** @noinspection PhpComposerExtensionStubsInspection */
5115 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5116
        }
5117
5118
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5119
5120
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5121
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5122
            $str = \mb_strtolower($str);
5123
        } else {
5124
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5125
        }
5126
5127
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5128
    }
5129
5130
    /**
5131
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5132
     *
5133
     * @param string $str <p>The input string.</p>
5134
     *
5135
     * @return false|string
5136
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5137
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5138
     */
5139 30
    public static function str_detect_encoding($str)
5140
    {
5141
        // init
5142 30
        $str = (string) $str;
5143
5144
        //
5145
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5146
        //
5147
5148 30
        if (self::is_binary($str, true) === true) {
5149 11
            $isUtf32 = self::is_utf32($str, false);
5150 11
            if ($isUtf32 === 1) {
5151
                return 'UTF-32LE';
5152
            }
5153 11
            if ($isUtf32 === 2) {
5154 1
                return 'UTF-32BE';
5155
            }
5156
5157 11
            $isUtf16 = self::is_utf16($str, false);
5158 11
            if ($isUtf16 === 1) {
5159 3
                return 'UTF-16LE';
5160
            }
5161 11
            if ($isUtf16 === 2) {
5162 2
                return 'UTF-16BE';
5163
            }
5164
5165
            // is binary but not "UTF-16" or "UTF-32"
5166 9
            return false;
5167
        }
5168
5169
        //
5170
        // 2.) simple check for ASCII chars
5171
        //
5172
5173 26
        if (ASCII::is_ascii($str) === true) {
5174 10
            return 'ASCII';
5175
        }
5176
5177
        //
5178
        // 3.) simple check for UTF-8 chars
5179
        //
5180
5181 26
        if (self::is_utf8_string($str) === true) {
5182 19
            return 'UTF-8';
5183
        }
5184
5185
        //
5186
        // 4.) check via "mb_detect_encoding()"
5187
        //
5188
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5189
5190
        $detectOrder = [
5191 15
            'ISO-8859-1',
5192
            'ISO-8859-2',
5193
            'ISO-8859-3',
5194
            'ISO-8859-4',
5195
            'ISO-8859-5',
5196
            'ISO-8859-6',
5197
            'ISO-8859-7',
5198
            'ISO-8859-8',
5199
            'ISO-8859-9',
5200
            'ISO-8859-10',
5201
            'ISO-8859-13',
5202
            'ISO-8859-14',
5203
            'ISO-8859-15',
5204
            'ISO-8859-16',
5205
            'WINDOWS-1251',
5206
            'WINDOWS-1252',
5207
            'WINDOWS-1254',
5208
            'CP932',
5209
            'CP936',
5210
            'CP950',
5211
            'CP866',
5212
            'CP850',
5213
            'CP51932',
5214
            'CP50220',
5215
            'CP50221',
5216
            'CP50222',
5217
            'ISO-2022-JP',
5218
            'ISO-2022-KR',
5219
            'JIS',
5220
            'JIS-ms',
5221
            'EUC-CN',
5222
            'EUC-JP',
5223
        ];
5224
5225 15
        if (self::$SUPPORT['mbstring'] === true) {
5226
            // info: do not use the symfony polyfill here
5227 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5228 15
            if ($encoding) {
5229 15
                return $encoding;
5230
            }
5231
        }
5232
5233
        //
5234
        // 5.) check via "iconv()"
5235
        //
5236
5237
        if (self::$ENCODINGS === null) {
5238
            self::$ENCODINGS = self::getData('encodings');
5239
        }
5240
5241
        foreach (self::$ENCODINGS as $encodingTmp) {
5242
            // INFO: //IGNORE but still throw notice
5243
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5244
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5245
                return $encodingTmp;
5246
            }
5247
        }
5248
5249
        return false;
5250
    }
5251
5252
    /**
5253
     * alias for "UTF8::str_ends_with()"
5254
     *
5255
     * @param string $haystack
5256
     * @param string $needle
5257
     *
5258
     * @return bool
5259
     *
5260
     * @see UTF8::str_ends_with()
5261
     */
5262
    public static function str_ends(string $haystack, string $needle): bool
5263
    {
5264
        return self::str_ends_with($haystack, $needle);
5265
    }
5266
5267
    /**
5268
     * Check if the string ends with the given substring.
5269
     *
5270
     * @param string $haystack <p>The string to search in.</p>
5271
     * @param string $needle   <p>The substring to search for.</p>
5272
     *
5273
     * @return bool
5274
     */
5275 9
    public static function str_ends_with(string $haystack, string $needle): bool
5276
    {
5277 9
        if ($needle === '') {
5278 2
            return true;
5279
        }
5280
5281 9
        if ($haystack === '') {
5282
            return false;
5283
        }
5284
5285 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5286
    }
5287
5288
    /**
5289
     * Returns true if the string ends with any of $substrings, false otherwise.
5290
     *
5291
     * - case-sensitive
5292
     *
5293
     * @param string   $str        <p>The input string.</p>
5294
     * @param string[] $substrings <p>Substrings to look for.</p>
5295
     *
5296
     * @return bool whether or not $str ends with $substring
5297
     */
5298 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5299
    {
5300 7
        if ($substrings === []) {
5301
            return false;
5302
        }
5303
5304 7
        foreach ($substrings as &$substring) {
5305 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5306 7
                return true;
5307
            }
5308
        }
5309
5310 6
        return false;
5311
    }
5312
5313
    /**
5314
     * Ensures that the string begins with $substring. If it doesn't, it's
5315
     * prepended.
5316
     *
5317
     * @param string $str       <p>The input string.</p>
5318
     * @param string $substring <p>The substring to add if not present.</p>
5319
     *
5320
     * @return string
5321
     */
5322 10
    public static function str_ensure_left(string $str, string $substring): string
5323
    {
5324
        if (
5325 10
            $substring !== ''
5326
            &&
5327 10
            \strpos($str, $substring) === 0
5328
        ) {
5329 6
            return $str;
5330
        }
5331
5332 4
        return $substring . $str;
5333
    }
5334
5335
    /**
5336
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5337
     *
5338
     * @param string $str       <p>The input string.</p>
5339
     * @param string $substring <p>The substring to add if not present.</p>
5340
     *
5341
     * @return string
5342
     */
5343 10
    public static function str_ensure_right(string $str, string $substring): string
5344
    {
5345
        if (
5346 10
            $str === ''
5347
            ||
5348 10
            $substring === ''
5349
            ||
5350 10
            \substr($str, -\strlen($substring)) !== $substring
5351
        ) {
5352 4
            $str .= $substring;
5353
        }
5354
5355 10
        return $str;
5356
    }
5357
5358
    /**
5359
     * Capitalizes the first word of the string, replaces underscores with
5360
     * spaces, and strips '_id'.
5361
     *
5362
     * @param string $str
5363
     *
5364
     * @return string
5365
     */
5366 3
    public static function str_humanize($str): string
5367
    {
5368 3
        $str = \str_replace(
5369
            [
5370 3
                '_id',
5371
                '_',
5372
            ],
5373
            [
5374 3
                '',
5375
                ' ',
5376
            ],
5377 3
            $str
5378
        );
5379
5380 3
        return self::ucfirst(\trim($str));
5381
    }
5382
5383
    /**
5384
     * alias for "UTF8::str_istarts_with()"
5385
     *
5386
     * @param string $haystack
5387
     * @param string $needle
5388
     *
5389
     * @return bool
5390
     *
5391
     * @see UTF8::str_istarts_with()
5392
     */
5393
    public static function str_ibegins(string $haystack, string $needle): bool
5394
    {
5395
        return self::str_istarts_with($haystack, $needle);
5396
    }
5397
5398
    /**
5399
     * alias for "UTF8::str_iends_with()"
5400
     *
5401
     * @param string $haystack
5402
     * @param string $needle
5403
     *
5404
     * @return bool
5405
     *
5406
     * @see UTF8::str_iends_with()
5407
     */
5408
    public static function str_iends(string $haystack, string $needle): bool
5409
    {
5410
        return self::str_iends_with($haystack, $needle);
5411
    }
5412
5413
    /**
5414
     * Check if the string ends with the given substring, case insensitive.
5415
     *
5416
     * @param string $haystack <p>The string to search in.</p>
5417
     * @param string $needle   <p>The substring to search for.</p>
5418
     *
5419
     * @return bool
5420
     */
5421 12
    public static function str_iends_with(string $haystack, string $needle): bool
5422
    {
5423 12
        if ($needle === '') {
5424 2
            return true;
5425
        }
5426
5427 12
        if ($haystack === '') {
5428
            return false;
5429
        }
5430
5431 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5432
    }
5433
5434
    /**
5435
     * Returns true if the string ends with any of $substrings, false otherwise.
5436
     *
5437
     * - case-insensitive
5438
     *
5439
     * @param string   $str        <p>The input string.</p>
5440
     * @param string[] $substrings <p>Substrings to look for.</p>
5441
     *
5442
     * @return bool whether or not $str ends with $substring
5443
     */
5444 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5445
    {
5446 4
        if ($substrings === []) {
5447
            return false;
5448
        }
5449
5450 4
        foreach ($substrings as &$substring) {
5451 4
            if (self::str_iends_with($str, $substring)) {
5452 4
                return true;
5453
            }
5454
        }
5455
5456
        return false;
5457
    }
5458
5459
    /**
5460
     * Returns the index of the first occurrence of $needle in the string,
5461
     * and false if not found. Accepts an optional offset from which to begin
5462
     * the search.
5463
     *
5464
     * @param string $str      <p>The input string.</p>
5465
     * @param string $needle   <p>Substring to look for.</p>
5466
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5467
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5468
     *
5469
     * @return false|int
5470
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5471
     */
5472 2
    public static function str_iindex_first(
5473
        string $str,
5474
        string $needle,
5475
        int $offset = 0,
5476
        string $encoding = 'UTF-8'
5477
    ) {
5478 2
        return self::stripos(
5479 2
            $str,
5480 2
            $needle,
5481 2
            $offset,
5482 2
            $encoding
5483
        );
5484
    }
5485
5486
    /**
5487
     * Returns the index of the last occurrence of $needle in the string,
5488
     * and false if not found. Accepts an optional offset from which to begin
5489
     * the search. Offsets may be negative to count from the last character
5490
     * in the string.
5491
     *
5492
     * @param string $str      <p>The input string.</p>
5493
     * @param string $needle   <p>Substring to look for.</p>
5494
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5495
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5496
     *
5497
     * @return false|int
5498
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5499
     */
5500
    public static function str_iindex_last(
5501
        string $str,
5502
        string $needle,
5503
        int $offset = 0,
5504
        string $encoding = 'UTF-8'
5505
    ) {
5506
        return self::strripos(
5507
            $str,
5508
            $needle,
5509
            $offset,
5510
            $encoding
5511
        );
5512
    }
5513
5514
    /**
5515
     * Returns the index of the first occurrence of $needle in the string,
5516
     * and false if not found. Accepts an optional offset from which to begin
5517
     * the search.
5518
     *
5519
     * @param string $str      <p>The input string.</p>
5520
     * @param string $needle   <p>Substring to look for.</p>
5521
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5522
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5523
     *
5524
     * @return false|int
5525
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5526
     */
5527 10
    public static function str_index_first(
5528
        string $str,
5529
        string $needle,
5530
        int $offset = 0,
5531
        string $encoding = 'UTF-8'
5532
    ) {
5533 10
        return self::strpos(
5534 10
            $str,
5535 10
            $needle,
5536 10
            $offset,
5537 10
            $encoding
5538
        );
5539
    }
5540
5541
    /**
5542
     * Returns the index of the last occurrence of $needle in the string,
5543
     * and false if not found. Accepts an optional offset from which to begin
5544
     * the search. Offsets may be negative to count from the last character
5545
     * in the string.
5546
     *
5547
     * @param string $str      <p>The input string.</p>
5548
     * @param string $needle   <p>Substring to look for.</p>
5549
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5550
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5551
     *
5552
     * @return false|int
5553
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5554
     */
5555 10
    public static function str_index_last(
5556
        string $str,
5557
        string $needle,
5558
        int $offset = 0,
5559
        string $encoding = 'UTF-8'
5560
    ) {
5561 10
        return self::strrpos(
5562 10
            $str,
5563 10
            $needle,
5564 10
            $offset,
5565 10
            $encoding
5566
        );
5567
    }
5568
5569
    /**
5570
     * Inserts $substring into the string at the $index provided.
5571
     *
5572
     * @param string $str       <p>The input string.</p>
5573
     * @param string $substring <p>String to be inserted.</p>
5574
     * @param int    $index     <p>The index at which to insert the substring.</p>
5575
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5576
     *
5577
     * @return string
5578
     */
5579 8
    public static function str_insert(
5580
        string $str,
5581
        string $substring,
5582
        int $index,
5583
        string $encoding = 'UTF-8'
5584
    ): string {
5585 8
        if ($encoding === 'UTF-8') {
5586 4
            $len = (int) \mb_strlen($str);
5587 4
            if ($index > $len) {
5588
                return $str;
5589
            }
5590
5591
            /** @noinspection UnnecessaryCastingInspection */
5592 4
            return (string) \mb_substr($str, 0, $index) .
5593 4
                   $substring .
5594 4
                   (string) \mb_substr($str, $index, $len);
5595
        }
5596
5597 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5598
5599 4
        $len = (int) self::strlen($str, $encoding);
5600 4
        if ($index > $len) {
5601 1
            return $str;
5602
        }
5603
5604 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5605 3
               $substring .
5606 3
               ((string) self::substr($str, $index, $len, $encoding));
5607
    }
5608
5609
    /**
5610
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5611
     *
5612
     * @see http://php.net/manual/en/function.str-ireplace.php
5613
     *
5614
     * @param mixed $search  <p>
5615
     *                       Every replacement with search array is
5616
     *                       performed on the result of previous replacement.
5617
     *                       </p>
5618
     * @param mixed $replace <p>
5619
     *                       </p>
5620
     * @param mixed $subject <p>
5621
     *                       If subject is an array, then the search and
5622
     *                       replace is performed with every entry of
5623
     *                       subject, and the return value is an array as
5624
     *                       well.
5625
     *                       </p>
5626
     * @param int   $count   [optional] <p>
5627
     *                       The number of matched and replaced needles will
5628
     *                       be returned in count which is passed by
5629
     *                       reference.
5630
     *                       </p>
5631
     *
5632
     * @return mixed a string or an array of replacements
5633
     */
5634 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5635
    {
5636 29
        $search = (array) $search;
5637
5638
        /** @noinspection AlterInForeachInspection */
5639 29
        foreach ($search as &$s) {
5640 29
            $s = (string) $s;
5641 29
            if ($s === '') {
5642 6
                $s = '/^(?<=.)$/';
5643
            } else {
5644 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5645
            }
5646
        }
5647
5648 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5649 29
        $count = $replace; // used as reference parameter
5650
5651 29
        return $subject;
5652
    }
5653
5654
    /**
5655
     * Replaces $search from the beginning of string with $replacement.
5656
     *
5657
     * @param string $str         <p>The input string.</p>
5658
     * @param string $search      <p>The string to search for.</p>
5659
     * @param string $replacement <p>The replacement.</p>
5660
     *
5661
     * @return string string after the replacements
5662
     */
5663 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5664
    {
5665 17
        if ($str === '') {
5666 4
            if ($replacement === '') {
5667 2
                return '';
5668
            }
5669
5670 2
            if ($search === '') {
5671 2
                return $replacement;
5672
            }
5673
        }
5674
5675 13
        if ($search === '') {
5676 2
            return $str . $replacement;
5677
        }
5678
5679 11
        if (\stripos($str, $search) === 0) {
5680 10
            return $replacement . \substr($str, \strlen($search));
5681
        }
5682
5683 1
        return $str;
5684
    }
5685
5686
    /**
5687
     * Replaces $search from the ending of string with $replacement.
5688
     *
5689
     * @param string $str         <p>The input string.</p>
5690
     * @param string $search      <p>The string to search for.</p>
5691
     * @param string $replacement <p>The replacement.</p>
5692
     *
5693
     * @return string string after the replacements
5694
     */
5695 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5696
    {
5697 17
        if ($str === '') {
5698 4
            if ($replacement === '') {
5699 2
                return '';
5700
            }
5701
5702 2
            if ($search === '') {
5703 2
                return $replacement;
5704
            }
5705
        }
5706
5707 13
        if ($search === '') {
5708 2
            return $str . $replacement;
5709
        }
5710
5711 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5712 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5713
        }
5714
5715 11
        return $str;
5716
    }
5717
5718
    /**
5719
     * Check if the string starts with the given substring, case insensitive.
5720
     *
5721
     * @param string $haystack <p>The string to search in.</p>
5722
     * @param string $needle   <p>The substring to search for.</p>
5723
     *
5724
     * @return bool
5725
     */
5726 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5727
    {
5728 12
        if ($needle === '') {
5729 2
            return true;
5730
        }
5731
5732 12
        if ($haystack === '') {
5733
            return false;
5734
        }
5735
5736 12
        return self::stripos($haystack, $needle) === 0;
5737
    }
5738
5739
    /**
5740
     * Returns true if the string begins with any of $substrings, false otherwise.
5741
     *
5742
     * - case-insensitive
5743
     *
5744
     * @param string $str        <p>The input string.</p>
5745
     * @param array  $substrings <p>Substrings to look for.</p>
5746
     *
5747
     * @return bool whether or not $str starts with $substring
5748
     */
5749 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5750
    {
5751 4
        if ($str === '') {
5752
            return false;
5753
        }
5754
5755 4
        if ($substrings === []) {
5756
            return false;
5757
        }
5758
5759 4
        foreach ($substrings as &$substring) {
5760 4
            if (self::str_istarts_with($str, $substring)) {
5761 4
                return true;
5762
            }
5763
        }
5764
5765
        return false;
5766
    }
5767
5768
    /**
5769
     * Gets the substring after the first occurrence of a separator.
5770
     *
5771
     * @param string $str       <p>The input string.</p>
5772
     * @param string $separator <p>The string separator.</p>
5773
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5774
     *
5775
     * @return string
5776
     */
5777 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5778
    {
5779 1
        if ($separator === '' || $str === '') {
5780 1
            return '';
5781
        }
5782
5783 1
        $offset = self::str_iindex_first($str, $separator);
5784 1
        if ($offset === false) {
5785 1
            return '';
5786
        }
5787
5788 1
        if ($encoding === 'UTF-8') {
5789 1
            return (string) \mb_substr(
5790 1
                $str,
5791 1
                $offset + (int) \mb_strlen($separator)
5792
            );
5793
        }
5794
5795
        return (string) self::substr(
5796
            $str,
5797
            $offset + (int) self::strlen($separator, $encoding),
5798
            null,
5799
            $encoding
5800
        );
5801
    }
5802
5803
    /**
5804
     * Gets the substring after the last occurrence of a separator.
5805
     *
5806
     * @param string $str       <p>The input string.</p>
5807
     * @param string $separator <p>The string separator.</p>
5808
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5809
     *
5810
     * @return string
5811
     */
5812 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5813
    {
5814 1
        if ($separator === '' || $str === '') {
5815 1
            return '';
5816
        }
5817
5818 1
        $offset = self::strripos($str, $separator);
5819 1
        if ($offset === false) {
5820 1
            return '';
5821
        }
5822
5823 1
        if ($encoding === 'UTF-8') {
5824 1
            return (string) \mb_substr(
5825 1
                $str,
5826 1
                $offset + (int) self::strlen($separator)
5827
            );
5828
        }
5829
5830
        return (string) self::substr(
5831
            $str,
5832
            $offset + (int) self::strlen($separator, $encoding),
5833
            null,
5834
            $encoding
5835
        );
5836
    }
5837
5838
    /**
5839
     * Gets the substring before the first occurrence of a separator.
5840
     *
5841
     * @param string $str       <p>The input string.</p>
5842
     * @param string $separator <p>The string separator.</p>
5843
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5844
     *
5845
     * @return string
5846
     */
5847 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5848
    {
5849 1
        if ($separator === '' || $str === '') {
5850 1
            return '';
5851
        }
5852
5853 1
        $offset = self::str_iindex_first($str, $separator);
5854 1
        if ($offset === false) {
5855 1
            return '';
5856
        }
5857
5858 1
        if ($encoding === 'UTF-8') {
5859 1
            return (string) \mb_substr($str, 0, $offset);
5860
        }
5861
5862
        return (string) self::substr($str, 0, $offset, $encoding);
5863
    }
5864
5865
    /**
5866
     * Gets the substring before the last occurrence of a separator.
5867
     *
5868
     * @param string $str       <p>The input string.</p>
5869
     * @param string $separator <p>The string separator.</p>
5870
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5871
     *
5872
     * @return string
5873
     */
5874 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
5875
    {
5876 1
        if ($separator === '' || $str === '') {
5877 1
            return '';
5878
        }
5879
5880 1
        if ($encoding === 'UTF-8') {
5881 1
            $offset = \mb_strripos($str, $separator);
5882 1
            if ($offset === false) {
5883 1
                return '';
5884
            }
5885
5886 1
            return (string) \mb_substr($str, 0, $offset);
5887
        }
5888
5889
        $offset = self::strripos($str, $separator, 0, $encoding);
5890
        if ($offset === false) {
5891
            return '';
5892
        }
5893
5894
        return (string) self::substr($str, 0, $offset, $encoding);
5895
    }
5896
5897
    /**
5898
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
5899
     *
5900
     * @param string $str          <p>The input string.</p>
5901
     * @param string $needle       <p>The string to look for.</p>
5902
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5903
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
5904
     *
5905
     * @return string
5906
     */
5907 2
    public static function str_isubstr_first(
5908
        string $str,
5909
        string $needle,
5910
        bool $beforeNeedle = false,
5911
        string $encoding = 'UTF-8'
5912
    ): string {
5913
        if (
5914 2
            $needle === ''
5915
            ||
5916 2
            $str === ''
5917
        ) {
5918 2
            return '';
5919
        }
5920
5921 2
        $part = self::stristr(
5922 2
            $str,
5923 2
            $needle,
5924 2
            $beforeNeedle,
5925 2
            $encoding
5926
        );
5927 2
        if ($part === false) {
5928 2
            return '';
5929
        }
5930
5931 2
        return $part;
5932
    }
5933
5934
    /**
5935
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
5936
     *
5937
     * @param string $str          <p>The input string.</p>
5938
     * @param string $needle       <p>The string to look for.</p>
5939
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
5940
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
5941
     *
5942
     * @return string
5943
     */
5944 1
    public static function str_isubstr_last(
5945
        string $str,
5946
        string $needle,
5947
        bool $beforeNeedle = false,
5948
        string $encoding = 'UTF-8'
5949
    ): string {
5950
        if (
5951 1
            $needle === ''
5952
            ||
5953 1
            $str === ''
5954
        ) {
5955 1
            return '';
5956
        }
5957
5958 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
5959 1
        if ($part === false) {
5960 1
            return '';
5961
        }
5962
5963 1
        return $part;
5964
    }
5965
5966
    /**
5967
     * Returns the last $n characters of the string.
5968
     *
5969
     * @param string $str      <p>The input string.</p>
5970
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
5971
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5972
     *
5973
     * @return string
5974
     */
5975 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
5976
    {
5977 12
        if ($str === '' || $n <= 0) {
5978 4
            return '';
5979
        }
5980
5981 8
        if ($encoding === 'UTF-8') {
5982 4
            return (string) \mb_substr($str, -$n);
5983
        }
5984
5985 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5986
5987 4
        return (string) self::substr($str, -$n, null, $encoding);
5988
    }
5989
5990
    /**
5991
     * Limit the number of characters in a string.
5992
     *
5993
     * @param string $str      <p>The input string.</p>
5994
     * @param int    $length   [optional] <p>Default: 100</p>
5995
     * @param string $strAddOn [optional] <p>Default: …</p>
5996
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5997
     *
5998
     * @return string
5999
     */
6000 2
    public static function str_limit(
6001
        string $str,
6002
        int $length = 100,
6003
        string $strAddOn = '…',
6004
        string $encoding = 'UTF-8'
6005
    ): string {
6006 2
        if ($str === '' || $length <= 0) {
6007 2
            return '';
6008
        }
6009
6010 2
        if ($encoding === 'UTF-8') {
6011 2
            if ((int) \mb_strlen($str) <= $length) {
6012 2
                return $str;
6013
            }
6014
6015
            /** @noinspection UnnecessaryCastingInspection */
6016 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6017
        }
6018
6019
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6020
6021
        if ((int) self::strlen($str, $encoding) <= $length) {
6022
            return $str;
6023
        }
6024
6025
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6026
    }
6027
6028
    /**
6029
     * Limit the number of characters in a string, but also after the next word.
6030
     *
6031
     * @param string $str      <p>The input string.</p>
6032
     * @param int    $length   [optional] <p>Default: 100</p>
6033
     * @param string $strAddOn [optional] <p>Default: …</p>
6034
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6035
     *
6036
     * @return string
6037
     */
6038 6
    public static function str_limit_after_word(
6039
        string $str,
6040
        int $length = 100,
6041
        string $strAddOn = '…',
6042
        string $encoding = 'UTF-8'
6043
    ): string {
6044 6
        if ($str === '' || $length <= 0) {
6045 2
            return '';
6046
        }
6047
6048 6
        if ($encoding === 'UTF-8') {
6049
            /** @noinspection UnnecessaryCastingInspection */
6050 2
            if ((int) \mb_strlen($str) <= $length) {
6051 2
                return $str;
6052
            }
6053
6054 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6055 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6056
            }
6057
6058 2
            $str = \mb_substr($str, 0, $length);
6059
6060 2
            $array = \explode(' ', $str);
6061 2
            \array_pop($array);
6062 2
            $new_str = \implode(' ', $array);
6063
6064 2
            if ($new_str === '') {
6065 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6066
            }
6067
        } else {
6068 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6069
                return $str;
6070
            }
6071
6072 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6073 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6074
            }
6075
6076 1
            $str = self::substr($str, 0, $length, $encoding);
6077 1
            if ($str === false) {
6078
                return '' . $strAddOn;
6079
            }
6080
6081 1
            $array = \explode(' ', $str);
6082 1
            \array_pop($array);
6083 1
            $new_str = \implode(' ', $array);
6084
6085 1
            if ($new_str === '') {
6086
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6087
            }
6088
        }
6089
6090 3
        return $new_str . $strAddOn;
6091
    }
6092
6093
    /**
6094
     * Returns the longest common prefix between the string and $otherStr.
6095
     *
6096
     * @param string $str      <p>The input sting.</p>
6097
     * @param string $otherStr <p>Second string for comparison.</p>
6098
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6099
     *
6100
     * @return string
6101
     */
6102 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6103
    {
6104
        // init
6105 10
        $longestCommonPrefix = '';
6106
6107 10
        if ($encoding === 'UTF-8') {
6108 5
            $maxLength = (int) \min(
6109 5
                \mb_strlen($str),
6110 5
                \mb_strlen($otherStr)
6111
            );
6112
6113 5
            for ($i = 0; $i < $maxLength; ++$i) {
6114 4
                $char = \mb_substr($str, $i, 1);
6115
6116
                if (
6117 4
                    $char !== false
6118
                    &&
6119 4
                    $char === \mb_substr($otherStr, $i, 1)
6120
                ) {
6121 3
                    $longestCommonPrefix .= $char;
6122
                } else {
6123 3
                    break;
6124
                }
6125
            }
6126
        } else {
6127 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6128
6129 5
            $maxLength = (int) \min(
6130 5
                self::strlen($str, $encoding),
6131 5
                self::strlen($otherStr, $encoding)
6132
            );
6133
6134 5
            for ($i = 0; $i < $maxLength; ++$i) {
6135 4
                $char = self::substr($str, $i, 1, $encoding);
6136
6137
                if (
6138 4
                    $char !== false
6139
                    &&
6140 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6141
                ) {
6142 3
                    $longestCommonPrefix .= $char;
6143
                } else {
6144 3
                    break;
6145
                }
6146
            }
6147
        }
6148
6149 10
        return $longestCommonPrefix;
6150
    }
6151
6152
    /**
6153
     * Returns the longest common substring between the string and $otherStr.
6154
     * In the case of ties, it returns that which occurs first.
6155
     *
6156
     * @param string $str
6157
     * @param string $otherStr <p>Second string for comparison.</p>
6158
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6159
     *
6160
     * @return string string with its $str being the longest common substring
6161
     */
6162 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6163
    {
6164 11
        if ($str === '' || $otherStr === '') {
6165 2
            return '';
6166
        }
6167
6168
        // Uses dynamic programming to solve
6169
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6170
6171 9
        if ($encoding === 'UTF-8') {
6172 4
            $strLength = (int) \mb_strlen($str);
6173 4
            $otherLength = (int) \mb_strlen($otherStr);
6174
        } else {
6175 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6176
6177 5
            $strLength = (int) self::strlen($str, $encoding);
6178 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6179
        }
6180
6181
        // Return if either string is empty
6182 9
        if ($strLength === 0 || $otherLength === 0) {
6183
            return '';
6184
        }
6185
6186 9
        $len = 0;
6187 9
        $end = 0;
6188 9
        $table = \array_fill(
6189 9
            0,
6190 9
            $strLength + 1,
6191 9
            \array_fill(0, $otherLength + 1, 0)
6192
        );
6193
6194 9
        if ($encoding === 'UTF-8') {
6195 9
            for ($i = 1; $i <= $strLength; ++$i) {
6196 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6197 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6198 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6199
6200 9
                    if ($strChar === $otherChar) {
6201 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6202 8
                        if ($table[$i][$j] > $len) {
6203 8
                            $len = $table[$i][$j];
6204 8
                            $end = $i;
6205
                        }
6206
                    } else {
6207 9
                        $table[$i][$j] = 0;
6208
                    }
6209
                }
6210
            }
6211
        } else {
6212
            for ($i = 1; $i <= $strLength; ++$i) {
6213
                for ($j = 1; $j <= $otherLength; ++$j) {
6214
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6215
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6216
6217
                    if ($strChar === $otherChar) {
6218
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6219
                        if ($table[$i][$j] > $len) {
6220
                            $len = $table[$i][$j];
6221
                            $end = $i;
6222
                        }
6223
                    } else {
6224
                        $table[$i][$j] = 0;
6225
                    }
6226
                }
6227
            }
6228
        }
6229
6230 9
        if ($encoding === 'UTF-8') {
6231 9
            return (string) \mb_substr($str, $end - $len, $len);
6232
        }
6233
6234
        return (string) self::substr($str, $end - $len, $len, $encoding);
6235
    }
6236
6237
    /**
6238
     * Returns the longest common suffix between the string and $otherStr.
6239
     *
6240
     * @param string $str
6241
     * @param string $otherStr <p>Second string for comparison.</p>
6242
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6243
     *
6244
     * @return string
6245
     */
6246 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6247
    {
6248 10
        if ($str === '' || $otherStr === '') {
6249 2
            return '';
6250
        }
6251
6252 8
        if ($encoding === 'UTF-8') {
6253 4
            $maxLength = (int) \min(
6254 4
                \mb_strlen($str, $encoding),
6255 4
                \mb_strlen($otherStr, $encoding)
6256
            );
6257
6258 4
            $longestCommonSuffix = '';
6259 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6260 4
                $char = \mb_substr($str, -$i, 1);
6261
6262
                if (
6263 4
                    $char !== false
6264
                    &&
6265 4
                    $char === \mb_substr($otherStr, -$i, 1)
6266
                ) {
6267 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6268
                } else {
6269 3
                    break;
6270
                }
6271
            }
6272
        } else {
6273 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6274
6275 4
            $maxLength = (int) \min(
6276 4
                self::strlen($str, $encoding),
6277 4
                self::strlen($otherStr, $encoding)
6278
            );
6279
6280 4
            $longestCommonSuffix = '';
6281 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6282 4
                $char = self::substr($str, -$i, 1, $encoding);
6283
6284
                if (
6285 4
                    $char !== false
6286
                    &&
6287 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6288
                ) {
6289 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6290
                } else {
6291 3
                    break;
6292
                }
6293
            }
6294
        }
6295
6296 8
        return $longestCommonSuffix;
6297
    }
6298
6299
    /**
6300
     * Returns true if $str matches the supplied pattern, false otherwise.
6301
     *
6302
     * @param string $str     <p>The input string.</p>
6303
     * @param string $pattern <p>Regex pattern to match against.</p>
6304
     *
6305
     * @return bool whether or not $str matches the pattern
6306
     */
6307
    public static function str_matches_pattern(string $str, string $pattern): bool
6308
    {
6309
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6310
    }
6311
6312
    /**
6313
     * Returns whether or not a character exists at an index. Offsets may be
6314
     * negative to count from the last character in the string. Implements
6315
     * part of the ArrayAccess interface.
6316
     *
6317
     * @param string $str      <p>The input string.</p>
6318
     * @param int    $offset   <p>The index to check.</p>
6319
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6320
     *
6321
     * @return bool whether or not the index exists
6322
     */
6323 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6324
    {
6325
        // init
6326 6
        $length = (int) self::strlen($str, $encoding);
6327
6328 6
        if ($offset >= 0) {
6329 3
            return $length > $offset;
6330
        }
6331
6332 3
        return $length >= \abs($offset);
6333
    }
6334
6335
    /**
6336
     * Returns the character at the given index. Offsets may be negative to
6337
     * count from the last character in the string. Implements part of the
6338
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6339
     * does not exist.
6340
     *
6341
     * @param string $str      <p>The input string.</p>
6342
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6343
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6344
     *
6345
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6346
     *
6347
     * @return string the character at the specified index
6348
     */
6349 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6350
    {
6351
        // init
6352 2
        $length = (int) self::strlen($str);
6353
6354
        if (
6355 2
            ($index >= 0 && $length <= $index)
6356
            ||
6357 2
            $length < \abs($index)
6358
        ) {
6359 1
            throw new \OutOfBoundsException('No character exists at the index');
6360
        }
6361
6362 1
        return self::char_at($str, $index, $encoding);
6363
    }
6364
6365
    /**
6366
     * Pad a UTF-8 string to given length with another string.
6367
     *
6368
     * @param string     $str        <p>The input string.</p>
6369
     * @param int        $pad_length <p>The length of return string.</p>
6370
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6371
     * @param int|string $pad_type   [optional] <p>
6372
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6373
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6374
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6375
     *                               </p>
6376
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6377
     *
6378
     * @return string returns the padded string
6379
     */
6380 41
    public static function str_pad(
6381
        string $str,
6382
        int $pad_length,
6383
        string $pad_string = ' ',
6384
        $pad_type = \STR_PAD_RIGHT,
6385
        string $encoding = 'UTF-8'
6386
    ): string {
6387 41
        if ($pad_length === 0 || $pad_string === '') {
6388 1
            return $str;
6389
        }
6390
6391 41
        if ($pad_type !== (int) $pad_type) {
6392 13
            if ($pad_type === 'left') {
6393 3
                $pad_type = \STR_PAD_LEFT;
6394 10
            } elseif ($pad_type === 'right') {
6395 6
                $pad_type = \STR_PAD_RIGHT;
6396 4
            } elseif ($pad_type === 'both') {
6397 3
                $pad_type = \STR_PAD_BOTH;
6398
            } else {
6399 1
                throw new \InvalidArgumentException(
6400 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6401
                );
6402
            }
6403
        }
6404
6405 40
        if ($encoding === 'UTF-8') {
6406 25
            $str_length = (int) \mb_strlen($str);
6407
6408 25
            if ($pad_length >= $str_length) {
6409
                switch ($pad_type) {
6410 25
                    case \STR_PAD_LEFT:
6411 8
                        $ps_length = (int) \mb_strlen($pad_string);
6412
6413 8
                        $diff = ($pad_length - $str_length);
6414
6415 8
                        $pre = (string) \mb_substr(
6416 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6417 8
                            0,
6418 8
                            $diff
6419
                        );
6420 8
                        $post = '';
6421
6422 8
                        break;
6423
6424 20
                    case \STR_PAD_BOTH:
6425 14
                        $diff = ($pad_length - $str_length);
6426
6427 14
                        $ps_length_left = (int) \floor($diff / 2);
6428
6429 14
                        $ps_length_right = (int) \ceil($diff / 2);
6430
6431 14
                        $pre = (string) \mb_substr(
6432 14
                            \str_repeat($pad_string, $ps_length_left),
6433 14
                            0,
6434 14
                            $ps_length_left
6435
                        );
6436 14
                        $post = (string) \mb_substr(
6437 14
                            \str_repeat($pad_string, $ps_length_right),
6438 14
                            0,
6439 14
                            $ps_length_right
6440
                        );
6441
6442 14
                        break;
6443
6444 9
                    case \STR_PAD_RIGHT:
6445
                    default:
6446 9
                        $ps_length = (int) \mb_strlen($pad_string);
6447
6448 9
                        $diff = ($pad_length - $str_length);
6449
6450 9
                        $post = (string) \mb_substr(
6451 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6452 9
                            0,
6453 9
                            $diff
6454
                        );
6455 9
                        $pre = '';
6456
                }
6457
6458 25
                return $pre . $str . $post;
6459
            }
6460
6461 3
            return $str;
6462
        }
6463
6464 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6465
6466 15
        $str_length = (int) self::strlen($str, $encoding);
6467
6468 15
        if ($pad_length >= $str_length) {
6469
            switch ($pad_type) {
6470 14
                case \STR_PAD_LEFT:
6471 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6472
6473 5
                    $diff = ($pad_length - $str_length);
6474
6475 5
                    $pre = (string) self::substr(
6476 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6477 5
                        0,
6478 5
                        $diff,
6479 5
                        $encoding
6480
                    );
6481 5
                    $post = '';
6482
6483 5
                    break;
6484
6485 9
                case \STR_PAD_BOTH:
6486 3
                    $diff = ($pad_length - $str_length);
6487
6488 3
                    $ps_length_left = (int) \floor($diff / 2);
6489
6490 3
                    $ps_length_right = (int) \ceil($diff / 2);
6491
6492 3
                    $pre = (string) self::substr(
6493 3
                        \str_repeat($pad_string, $ps_length_left),
6494 3
                        0,
6495 3
                        $ps_length_left,
6496 3
                        $encoding
6497
                    );
6498 3
                    $post = (string) self::substr(
6499 3
                        \str_repeat($pad_string, $ps_length_right),
6500 3
                        0,
6501 3
                        $ps_length_right,
6502 3
                        $encoding
6503
                    );
6504
6505 3
                    break;
6506
6507 6
                case \STR_PAD_RIGHT:
6508
                default:
6509 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6510
6511 6
                    $diff = ($pad_length - $str_length);
6512
6513 6
                    $post = (string) self::substr(
6514 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6515 6
                        0,
6516 6
                        $diff,
6517 6
                        $encoding
6518
                    );
6519 6
                    $pre = '';
6520
            }
6521
6522 14
            return $pre . $str . $post;
6523
        }
6524
6525 1
        return $str;
6526
    }
6527
6528
    /**
6529
     * Returns a new string of a given length such that both sides of the
6530
     * string are padded. Alias for pad() with a $padType of 'both'.
6531
     *
6532
     * @param string $str
6533
     * @param int    $length   <p>Desired string length after padding.</p>
6534
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6535
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6536
     *
6537
     * @return string string with padding applied
6538
     */
6539 11
    public static function str_pad_both(
6540
        string $str,
6541
        int $length,
6542
        string $padStr = ' ',
6543
        string $encoding = 'UTF-8'
6544
    ): string {
6545 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6546
    }
6547
6548
    /**
6549
     * Returns a new string of a given length such that the beginning of the
6550
     * string is padded. Alias for pad() with a $padType of 'left'.
6551
     *
6552
     * @param string $str
6553
     * @param int    $length   <p>Desired string length after padding.</p>
6554
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6555
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6556
     *
6557
     * @return string string with left padding
6558
     */
6559 7
    public static function str_pad_left(
6560
        string $str,
6561
        int $length,
6562
        string $padStr = ' ',
6563
        string $encoding = 'UTF-8'
6564
    ): string {
6565 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6566
    }
6567
6568
    /**
6569
     * Returns a new string of a given length such that the end of the string
6570
     * is padded. Alias for pad() with a $padType of 'right'.
6571
     *
6572
     * @param string $str
6573
     * @param int    $length   <p>Desired string length after padding.</p>
6574
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6575
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6576
     *
6577
     * @return string string with right padding
6578
     */
6579 7
    public static function str_pad_right(
6580
        string $str,
6581
        int $length,
6582
        string $padStr = ' ',
6583
        string $encoding = 'UTF-8'
6584
    ): string {
6585 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6586
    }
6587
6588
    /**
6589
     * Repeat a string.
6590
     *
6591
     * @param string $str        <p>
6592
     *                           The string to be repeated.
6593
     *                           </p>
6594
     * @param int    $multiplier <p>
6595
     *                           Number of time the input string should be
6596
     *                           repeated.
6597
     *                           </p>
6598
     *                           <p>
6599
     *                           multiplier has to be greater than or equal to 0.
6600
     *                           If the multiplier is set to 0, the function
6601
     *                           will return an empty string.
6602
     *                           </p>
6603
     *
6604
     * @return string the repeated string
6605
     */
6606 9
    public static function str_repeat(string $str, int $multiplier): string
6607
    {
6608 9
        $str = self::filter($str);
6609
6610 9
        return \str_repeat($str, $multiplier);
6611
    }
6612
6613
    /**
6614
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6615
     *
6616
     * Replace all occurrences of the search string with the replacement string
6617
     *
6618
     * @see http://php.net/manual/en/function.str-replace.php
6619
     *
6620
     * @param mixed $search  <p>
6621
     *                       The value being searched for, otherwise known as the needle.
6622
     *                       An array may be used to designate multiple needles.
6623
     *                       </p>
6624
     * @param mixed $replace <p>
6625
     *                       The replacement value that replaces found search
6626
     *                       values. An array may be used to designate multiple replacements.
6627
     *                       </p>
6628
     * @param mixed $subject <p>
6629
     *                       The string or array being searched and replaced on,
6630
     *                       otherwise known as the haystack.
6631
     *                       </p>
6632
     *                       <p>
6633
     *                       If subject is an array, then the search and
6634
     *                       replace is performed with every entry of
6635
     *                       subject, and the return value is an array as
6636
     *                       well.
6637
     *                       </p>
6638
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6639
     *
6640
     * @return mixed this function returns a string or an array with the replaced values
6641
     */
6642 12
    public static function str_replace(
6643
        $search,
6644
        $replace,
6645
        $subject,
6646
        int &$count = null
6647
    ) {
6648
        /**
6649
         * @psalm-suppress PossiblyNullArgument
6650
         */
6651 12
        return \str_replace(
6652 12
            $search,
6653 12
            $replace,
6654 12
            $subject,
6655 12
            $count
6656
        );
6657
    }
6658
6659
    /**
6660
     * Replaces $search from the beginning of string with $replacement.
6661
     *
6662
     * @param string $str         <p>The input string.</p>
6663
     * @param string $search      <p>The string to search for.</p>
6664
     * @param string $replacement <p>The replacement.</p>
6665
     *
6666
     * @return string string after the replacements
6667
     */
6668 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6669
    {
6670 17
        if ($str === '') {
6671 4
            if ($replacement === '') {
6672 2
                return '';
6673
            }
6674
6675 2
            if ($search === '') {
6676 2
                return $replacement;
6677
            }
6678
        }
6679
6680 13
        if ($search === '') {
6681 2
            return $str . $replacement;
6682
        }
6683
6684 11
        if (\strpos($str, $search) === 0) {
6685 9
            return $replacement . \substr($str, \strlen($search));
6686
        }
6687
6688 2
        return $str;
6689
    }
6690
6691
    /**
6692
     * Replaces $search from the ending of string with $replacement.
6693
     *
6694
     * @param string $str         <p>The input string.</p>
6695
     * @param string $search      <p>The string to search for.</p>
6696
     * @param string $replacement <p>The replacement.</p>
6697
     *
6698
     * @return string string after the replacements
6699
     */
6700 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6701
    {
6702 17
        if ($str === '') {
6703 4
            if ($replacement === '') {
6704 2
                return '';
6705
            }
6706
6707 2
            if ($search === '') {
6708 2
                return $replacement;
6709
            }
6710
        }
6711
6712 13
        if ($search === '') {
6713 2
            return $str . $replacement;
6714
        }
6715
6716 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6717 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6718
        }
6719
6720 11
        return $str;
6721
    }
6722
6723
    /**
6724
     * Replace the first "$search"-term with the "$replace"-term.
6725
     *
6726
     * @param string $search
6727
     * @param string $replace
6728
     * @param string $subject
6729
     *
6730
     * @return string
6731
     *
6732
     * @psalm-suppress InvalidReturnType
6733
     */
6734 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6735
    {
6736 2
        $pos = self::strpos($subject, $search);
6737
6738 2
        if ($pos !== false) {
6739
            /**
6740
             * @psalm-suppress InvalidReturnStatement
6741
             */
6742 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6743 2
                $subject,
6744 2
                $replace,
6745 2
                $pos,
6746 2
                (int) self::strlen($search)
6747
            );
6748
        }
6749
6750 2
        return $subject;
6751
    }
6752
6753
    /**
6754
     * Replace the last "$search"-term with the "$replace"-term.
6755
     *
6756
     * @param string $search
6757
     * @param string $replace
6758
     * @param string $subject
6759
     *
6760
     * @return string
6761
     *
6762
     * @psalm-suppress InvalidReturnType
6763
     */
6764 2
    public static function str_replace_last(
6765
        string $search,
6766
        string $replace,
6767
        string $subject
6768
    ): string {
6769 2
        $pos = self::strrpos($subject, $search);
6770 2
        if ($pos !== false) {
6771
            /**
6772
             * @psalm-suppress InvalidReturnStatement
6773
             */
6774 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6775 2
                $subject,
6776 2
                $replace,
6777 2
                $pos,
6778 2
                (int) self::strlen($search)
6779
            );
6780
        }
6781
6782 2
        return $subject;
6783
    }
6784
6785
    /**
6786
     * Shuffles all the characters in the string.
6787
     *
6788
     * PS: uses random algorithm which is weak for cryptography purposes
6789
     *
6790
     * @param string $str      <p>The input string</p>
6791
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6792
     *
6793
     * @return string the shuffled string
6794
     */
6795 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6796
    {
6797 5
        if ($encoding === 'UTF-8') {
6798 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6799
            /** @noinspection NonSecureShuffleUsageInspection */
6800 5
            \shuffle($indexes);
6801
6802
            // init
6803 5
            $shuffledStr = '';
6804
6805 5
            foreach ($indexes as &$i) {
6806 5
                $tmpSubStr = \mb_substr($str, $i, 1);
6807 5
                if ($tmpSubStr !== false) {
6808 5
                    $shuffledStr .= $tmpSubStr;
6809
                }
6810
            }
6811
        } else {
6812
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6813
6814
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6815
            /** @noinspection NonSecureShuffleUsageInspection */
6816
            \shuffle($indexes);
6817
6818
            // init
6819
            $shuffledStr = '';
6820
6821
            foreach ($indexes as &$i) {
6822
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
6823
                if ($tmpSubStr !== false) {
6824
                    $shuffledStr .= $tmpSubStr;
6825
                }
6826
            }
6827
        }
6828
6829 5
        return $shuffledStr;
6830
    }
6831
6832
    /**
6833
     * Returns the substring beginning at $start, and up to, but not including
6834
     * the index specified by $end. If $end is omitted, the function extracts
6835
     * the remaining string. If $end is negative, it is computed from the end
6836
     * of the string.
6837
     *
6838
     * @param string $str
6839
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6840
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6841
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6842
     *
6843
     * @return false|string
6844
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6845
     *                      characters long, <b>FALSE</b> will be returned.
6846
     */
6847 18
    public static function str_slice(
6848
        string $str,
6849
        int $start,
6850
        int $end = null,
6851
        string $encoding = 'UTF-8'
6852
    ) {
6853 18
        if ($encoding === 'UTF-8') {
6854 7
            if ($end === null) {
6855 1
                $length = (int) \mb_strlen($str);
6856 6
            } elseif ($end >= 0 && $end <= $start) {
6857 2
                return '';
6858 4
            } elseif ($end < 0) {
6859 1
                $length = (int) \mb_strlen($str) + $end - $start;
6860
            } else {
6861 3
                $length = $end - $start;
6862
            }
6863
6864 5
            return \mb_substr($str, $start, $length);
6865
        }
6866
6867 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6868
6869 11
        if ($end === null) {
6870 5
            $length = (int) self::strlen($str, $encoding);
6871 6
        } elseif ($end >= 0 && $end <= $start) {
6872 2
            return '';
6873 4
        } elseif ($end < 0) {
6874 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
6875
        } else {
6876 3
            $length = $end - $start;
6877
        }
6878
6879 9
        return self::substr($str, $start, $length, $encoding);
6880
    }
6881
6882
    /**
6883
     * Convert a string to e.g.: "snake_case"
6884
     *
6885
     * @param string $str
6886
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6887
     *
6888
     * @return string string in snake_case
6889
     */
6890 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
6891
    {
6892 22
        if ($str === '') {
6893
            return '';
6894
        }
6895
6896 22
        $str = \str_replace(
6897 22
            '-',
6898 22
            '_',
6899 22
            self::normalize_whitespace($str)
6900
        );
6901
6902 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
6903 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6904
        }
6905
6906 22
        $str = (string) \preg_replace_callback(
6907 22
            '/([\\p{N}|\\p{Lu}])/u',
6908
            /**
6909
             * @param string[] $matches
6910
             *
6911
             * @return string
6912
             */
6913
            static function (array $matches) use ($encoding): string {
6914 9
                $match = $matches[1];
6915 9
                $matchInt = (int) $match;
6916
6917 9
                if ((string) $matchInt === $match) {
6918 4
                    return '_' . $match . '_';
6919
                }
6920
6921 5
                if ($encoding === 'UTF-8') {
6922 5
                    return '_' . \mb_strtolower($match);
6923
                }
6924
6925
                return '_' . self::strtolower($match, $encoding);
6926 22
            },
6927 22
            $str
6928
        );
6929
6930 22
        $str = (string) \preg_replace(
6931
            [
6932 22
                '/\\s+/u',           // convert spaces to "_"
6933
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
6934
                '/_+/',                 // remove double "_"
6935
            ],
6936
            [
6937 22
                '_',
6938
                '',
6939
                '_',
6940
            ],
6941 22
            $str
6942
        );
6943
6944 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
6945
    }
6946
6947
    /**
6948
     * Sort all characters according to code points.
6949
     *
6950
     * @param string $str    <p>A UTF-8 string.</p>
6951
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
6952
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
6953
     *
6954
     * @return string string of sorted characters
6955
     */
6956 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
6957
    {
6958 2
        $array = self::codepoints($str);
6959
6960 2
        if ($unique) {
6961 2
            $array = \array_flip(\array_flip($array));
6962
        }
6963
6964 2
        if ($desc) {
6965 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6965
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
6966
        } else {
6967 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

6967
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
6968
        }
6969
6970 2
        return self::string($array);
6971
    }
6972
6973
    /**
6974
     * Convert a string to an array of Unicode characters.
6975
     *
6976
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
6977
     * @param int                       $length             [optional] <p>Max character length of each array
6978
     *                                                      element.</p>
6979
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
6980
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
6981
     *                                                      "mb_substr"</p>
6982
     *
6983
     * @return array
6984
     *               <p>An array containing chunks of the input.</p>
6985
     */
6986 89
    public static function str_split(
6987
        $str,
6988
        int $length = 1,
6989
        bool $cleanUtf8 = false,
6990
        bool $tryToUseMbFunction = true
6991
    ): array {
6992 89
        if ($length <= 0) {
6993 3
            return [];
6994
        }
6995
6996 88
        if (\is_array($str) === true) {
6997 2
            foreach ($str as $k => &$v) {
6998 2
                $v = self::str_split(
6999 2
                    $v,
7000 2
                    $length,
7001 2
                    $cleanUtf8,
7002 2
                    $tryToUseMbFunction
7003
                );
7004
            }
7005
7006 2
            return $str;
7007
        }
7008
7009
        // init
7010 88
        $str = (string) $str;
7011
7012 88
        if ($str === '') {
7013 13
            return [];
7014
        }
7015
7016 85
        if ($cleanUtf8 === true) {
7017 19
            $str = self::clean($str);
7018
        }
7019
7020
        if (
7021 85
            $tryToUseMbFunction === true
7022
            &&
7023 85
            self::$SUPPORT['mbstring'] === true
7024
        ) {
7025 81
            $iMax = \mb_strlen($str);
7026 81
            if ($iMax <= 127) {
7027 75
                $ret = [];
7028 75
                for ($i = 0; $i < $iMax; ++$i) {
7029 75
                    $ret[] = \mb_substr($str, $i, 1);
7030
                }
7031
            } else {
7032 16
                $retArray = [];
7033 16
                \preg_match_all('/./us', $str, $retArray);
7034 81
                $ret = $retArray[0] ?? [];
7035
            }
7036 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7037 17
            $retArray = [];
7038 17
            \preg_match_all('/./us', $str, $retArray);
7039 17
            $ret = $retArray[0] ?? [];
7040
        } else {
7041
7042
            // fallback
7043
7044 8
            $ret = [];
7045 8
            $len = \strlen($str);
7046
7047
            /** @noinspection ForeachInvariantsInspection */
7048 8
            for ($i = 0; $i < $len; ++$i) {
7049 8
                if (($str[$i] & "\x80") === "\x00") {
7050 8
                    $ret[] = $str[$i];
7051
                } elseif (
7052 8
                    isset($str[$i + 1])
7053
                    &&
7054 8
                    ($str[$i] & "\xE0") === "\xC0"
7055
                ) {
7056 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7057 4
                        $ret[] = $str[$i] . $str[$i + 1];
7058
7059 4
                        ++$i;
7060
                    }
7061
                } elseif (
7062 6
                    isset($str[$i + 2])
7063
                    &&
7064 6
                    ($str[$i] & "\xF0") === "\xE0"
7065
                ) {
7066
                    if (
7067 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7068
                        &&
7069 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7070
                    ) {
7071 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7072
7073 6
                        $i += 2;
7074
                    }
7075
                } elseif (
7076
                    isset($str[$i + 3])
7077
                    &&
7078
                    ($str[$i] & "\xF8") === "\xF0"
7079
                ) {
7080
                    if (
7081
                        ($str[$i + 1] & "\xC0") === "\x80"
7082
                        &&
7083
                        ($str[$i + 2] & "\xC0") === "\x80"
7084
                        &&
7085
                        ($str[$i + 3] & "\xC0") === "\x80"
7086
                    ) {
7087
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7088
7089
                        $i += 3;
7090
                    }
7091
                }
7092
            }
7093
        }
7094
7095 85
        if ($length > 1) {
7096 11
            $ret = \array_chunk($ret, $length);
7097
7098 11
            return \array_map(
7099
                static function (array &$item): string {
7100 11
                    return \implode('', $item);
7101 11
                },
7102 11
                $ret
7103
            );
7104
        }
7105
7106 78
        if (isset($ret[0]) && $ret[0] === '') {
7107
            return [];
7108
        }
7109
7110 78
        return $ret;
7111
    }
7112
7113
    /**
7114
     * Splits the string with the provided regular expression, returning an
7115
     * array of Stringy objects. An optional integer $limit will truncate the
7116
     * results.
7117
     *
7118
     * @param string $str
7119
     * @param string $pattern <p>The regex with which to split the string.</p>
7120
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7121
     *
7122
     * @return string[] an array of strings
7123
     */
7124 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7125
    {
7126 16
        if ($limit === 0) {
7127 2
            return [];
7128
        }
7129
7130 14
        if ($pattern === '') {
7131 1
            return [$str];
7132
        }
7133
7134 13
        if (self::$SUPPORT['mbstring'] === true) {
7135 13
            if ($limit >= 0) {
7136
                /** @noinspection PhpComposerExtensionStubsInspection */
7137 8
                $resultTmp = \mb_split($pattern, $str);
7138
7139 8
                $result = [];
7140 8
                foreach ($resultTmp as $itemTmp) {
7141 8
                    if ($limit === 0) {
7142 4
                        break;
7143
                    }
7144 8
                    --$limit;
7145
7146 8
                    $result[] = $itemTmp;
7147
                }
7148
7149 8
                return $result;
7150
            }
7151
7152
            /** @noinspection PhpComposerExtensionStubsInspection */
7153 5
            return \mb_split($pattern, $str);
7154
        }
7155
7156
        if ($limit > 0) {
7157
            ++$limit;
7158
        } else {
7159
            $limit = -1;
7160
        }
7161
7162
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7163
7164
        if ($array === false) {
7165
            return [];
7166
        }
7167
7168
        if ($limit > 0 && \count($array) === $limit) {
7169
            \array_pop($array);
7170
        }
7171
7172
        return $array;
7173
    }
7174
7175
    /**
7176
     * Check if the string starts with the given substring.
7177
     *
7178
     * @param string $haystack <p>The string to search in.</p>
7179
     * @param string $needle   <p>The substring to search for.</p>
7180
     *
7181
     * @return bool
7182
     */
7183 19
    public static function str_starts_with(string $haystack, string $needle): bool
7184
    {
7185 19
        if ($needle === '') {
7186 2
            return true;
7187
        }
7188
7189 19
        if ($haystack === '') {
7190
            return false;
7191
        }
7192
7193 19
        return \strpos($haystack, $needle) === 0;
7194
    }
7195
7196
    /**
7197
     * Returns true if the string begins with any of $substrings, false otherwise.
7198
     *
7199
     * - case-sensitive
7200
     *
7201
     * @param string $str        <p>The input string.</p>
7202
     * @param array  $substrings <p>Substrings to look for.</p>
7203
     *
7204
     * @return bool whether or not $str starts with $substring
7205
     */
7206 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7207
    {
7208 8
        if ($str === '') {
7209
            return false;
7210
        }
7211
7212 8
        if ($substrings === []) {
7213
            return false;
7214
        }
7215
7216 8
        foreach ($substrings as &$substring) {
7217 8
            if (self::str_starts_with($str, $substring)) {
7218 8
                return true;
7219
            }
7220
        }
7221
7222 6
        return false;
7223
    }
7224
7225
    /**
7226
     * Gets the substring after the first occurrence of a separator.
7227
     *
7228
     * @param string $str       <p>The input string.</p>
7229
     * @param string $separator <p>The string separator.</p>
7230
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7231
     *
7232
     * @return string
7233
     */
7234 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7235
    {
7236 1
        if ($separator === '' || $str === '') {
7237 1
            return '';
7238
        }
7239
7240 1
        if ($encoding === 'UTF-8') {
7241 1
            $offset = \mb_strpos($str, $separator);
7242 1
            if ($offset === false) {
7243 1
                return '';
7244
            }
7245
7246 1
            return (string) \mb_substr(
7247 1
                $str,
7248 1
                $offset + (int) \mb_strlen($separator)
7249
            );
7250
        }
7251
7252
        $offset = self::strpos($str, $separator, 0, $encoding);
7253
        if ($offset === false) {
7254
            return '';
7255
        }
7256
7257
        return (string) \mb_substr(
7258
            $str,
7259
            $offset + (int) self::strlen($separator, $encoding),
7260
            null,
7261
            $encoding
7262
        );
7263
    }
7264
7265
    /**
7266
     * Gets the substring after the last occurrence of a separator.
7267
     *
7268
     * @param string $str       <p>The input string.</p>
7269
     * @param string $separator <p>The string separator.</p>
7270
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7271
     *
7272
     * @return string
7273
     */
7274 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7275
    {
7276 1
        if ($separator === '' || $str === '') {
7277 1
            return '';
7278
        }
7279
7280 1
        if ($encoding === 'UTF-8') {
7281 1
            $offset = \mb_strrpos($str, $separator);
7282 1
            if ($offset === false) {
7283 1
                return '';
7284
            }
7285
7286 1
            return (string) \mb_substr(
7287 1
                $str,
7288 1
                $offset + (int) \mb_strlen($separator)
7289
            );
7290
        }
7291
7292
        $offset = self::strrpos($str, $separator, 0, $encoding);
7293
        if ($offset === false) {
7294
            return '';
7295
        }
7296
7297
        return (string) self::substr(
7298
            $str,
7299
            $offset + (int) self::strlen($separator, $encoding),
7300
            null,
7301
            $encoding
7302
        );
7303
    }
7304
7305
    /**
7306
     * Gets the substring before the first occurrence of a separator.
7307
     *
7308
     * @param string $str       <p>The input string.</p>
7309
     * @param string $separator <p>The string separator.</p>
7310
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7311
     *
7312
     * @return string
7313
     */
7314 1
    public static function str_substr_before_first_separator(
7315
        string $str,
7316
        string $separator,
7317
        string $encoding = 'UTF-8'
7318
    ): string {
7319 1
        if ($separator === '' || $str === '') {
7320 1
            return '';
7321
        }
7322
7323 1
        if ($encoding === 'UTF-8') {
7324 1
            $offset = \mb_strpos($str, $separator);
7325 1
            if ($offset === false) {
7326 1
                return '';
7327
            }
7328
7329 1
            return (string) \mb_substr(
7330 1
                $str,
7331 1
                0,
7332 1
                $offset
7333
            );
7334
        }
7335
7336
        $offset = self::strpos($str, $separator, 0, $encoding);
7337
        if ($offset === false) {
7338
            return '';
7339
        }
7340
7341
        return (string) self::substr(
7342
            $str,
7343
            0,
7344
            $offset,
7345
            $encoding
7346
        );
7347
    }
7348
7349
    /**
7350
     * Gets the substring before the last occurrence of a separator.
7351
     *
7352
     * @param string $str       <p>The input string.</p>
7353
     * @param string $separator <p>The string separator.</p>
7354
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7355
     *
7356
     * @return string
7357
     */
7358 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7359
    {
7360 1
        if ($separator === '' || $str === '') {
7361 1
            return '';
7362
        }
7363
7364 1
        if ($encoding === 'UTF-8') {
7365 1
            $offset = \mb_strrpos($str, $separator);
7366 1
            if ($offset === false) {
7367 1
                return '';
7368
            }
7369
7370 1
            return (string) \mb_substr(
7371 1
                $str,
7372 1
                0,
7373 1
                $offset
7374
            );
7375
        }
7376
7377
        $offset = self::strrpos($str, $separator, 0, $encoding);
7378
        if ($offset === false) {
7379
            return '';
7380
        }
7381
7382
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7383
7384
        return (string) self::substr(
7385
            $str,
7386
            0,
7387
            $offset,
7388
            $encoding
7389
        );
7390
    }
7391
7392
    /**
7393
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7394
     *
7395
     * @param string $str          <p>The input string.</p>
7396
     * @param string $needle       <p>The string to look for.</p>
7397
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7398
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7399
     *
7400
     * @return string
7401
     */
7402 2
    public static function str_substr_first(
7403
        string $str,
7404
        string $needle,
7405
        bool $beforeNeedle = false,
7406
        string $encoding = 'UTF-8'
7407
    ): string {
7408 2
        if ($str === '' || $needle === '') {
7409 2
            return '';
7410
        }
7411
7412 2
        if ($encoding === 'UTF-8') {
7413 2
            if ($beforeNeedle === true) {
7414 1
                $part = \mb_strstr(
7415 1
                    $str,
7416 1
                    $needle,
7417 1
                    $beforeNeedle
7418
                );
7419
            } else {
7420 1
                $part = \mb_strstr(
7421 1
                    $str,
7422 2
                    $needle
7423
                );
7424
            }
7425
        } else {
7426
            $part = self::strstr(
7427
                $str,
7428
                $needle,
7429
                $beforeNeedle,
7430
                $encoding
7431
            );
7432
        }
7433
7434 2
        return $part === false ? '' : $part;
7435
    }
7436
7437
    /**
7438
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7439
     *
7440
     * @param string $str          <p>The input string.</p>
7441
     * @param string $needle       <p>The string to look for.</p>
7442
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7443
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7444
     *
7445
     * @return string
7446
     */
7447 2
    public static function str_substr_last(
7448
        string $str,
7449
        string $needle,
7450
        bool $beforeNeedle = false,
7451
        string $encoding = 'UTF-8'
7452
    ): string {
7453 2
        if ($str === '' || $needle === '') {
7454 2
            return '';
7455
        }
7456
7457 2
        if ($encoding === 'UTF-8') {
7458 2
            if ($beforeNeedle === true) {
7459 1
                $part = \mb_strrchr(
7460 1
                    $str,
7461 1
                    $needle,
7462 1
                    $beforeNeedle
7463
                );
7464
            } else {
7465 1
                $part = \mb_strrchr(
7466 1
                    $str,
7467 2
                    $needle
7468
                );
7469
            }
7470
        } else {
7471
            $part = self::strrchr(
7472
                $str,
7473
                $needle,
7474
                $beforeNeedle,
7475
                $encoding
7476
            );
7477
        }
7478
7479 2
        return $part === false ? '' : $part;
7480
    }
7481
7482
    /**
7483
     * Surrounds $str with the given substring.
7484
     *
7485
     * @param string $str
7486
     * @param string $substring <p>The substring to add to both sides.</P>
7487
     *
7488
     * @return string string with the substring both prepended and appended
7489
     */
7490 5
    public static function str_surround(string $str, string $substring): string
7491
    {
7492 5
        return $substring . $str . $substring;
7493
    }
7494
7495
    /**
7496
     * Returns a trimmed string with the first letter of each word capitalized.
7497
     * Also accepts an array, $ignore, allowing you to list words not to be
7498
     * capitalized.
7499
     *
7500
     * @param string              $str
7501
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7502
     *                                                   Default: null</p>
7503
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7504
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7505
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7506
     *                                                   tr</p>
7507
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7508
     *                                                   ß</p>
7509
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7510
     * @param string|null         $word_define_chars     [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7511
     *
7512
     * @return string the titleized string
7513
     */
7514 10
    public static function str_titleize(
7515
        string $str,
7516
        array $ignore = null,
7517
        string $encoding = 'UTF-8',
7518
        bool $cleanUtf8 = false,
7519
        string $lang = null,
7520
        bool $tryToKeepStringLength = false,
7521
        bool $useTrimFirst = true,
7522
        string $word_define_chars = null
7523
    ): string {
7524 10
        if ($str === '') {
7525
            return '';
7526
        }
7527
7528 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7529 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7530
        }
7531
7532 10
        if ($useTrimFirst === true) {
7533 10
            $str = \trim($str);
7534
        }
7535
7536 10
        if ($cleanUtf8 === true) {
7537
            $str = self::clean($str);
7538
        }
7539
7540 10
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7541
7542 10
        if ($word_define_chars) {
7543 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7544
        } else {
7545 6
            $word_define_chars = '';
7546
        }
7547
7548 10
        $str = (string) \preg_replace_callback(
7549 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7550
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7551 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7552 4
                    return $match[0];
7553
                }
7554
7555 10
                if ($useMbFunction === true) {
7556 10
                    if ($encoding === 'UTF-8') {
7557 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7558 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7559
                    }
7560
7561
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7562
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7563
                }
7564
7565
                return self::ucfirst(
7566
                    self::strtolower(
7567
                        $match[0],
7568
                        $encoding,
7569
                        false,
7570
                        $lang,
7571
                        $tryToKeepStringLength
7572
                    ),
7573
                    $encoding,
7574
                    false,
7575
                    $lang,
7576
                    $tryToKeepStringLength
7577
                );
7578 10
            },
7579 10
            $str
7580
        );
7581
7582 10
        return $str;
7583
    }
7584
7585
    /**
7586
     * Returns a trimmed string in proper title case.
7587
     *
7588
     * Also accepts an array, $ignore, allowing you to list words not to be
7589
     * capitalized.
7590
     *
7591
     * Adapted from John Gruber's script.
7592
     *
7593
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7594
     *
7595
     * @param string $str
7596
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7597
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7598
     *
7599
     * @return string the titleized string
7600
     */
7601 35
    public static function str_titleize_for_humans(
7602
        string $str,
7603
        array $ignore = [],
7604
        string $encoding = 'UTF-8'
7605
    ): string {
7606 35
        $smallWords = \array_merge(
7607
            [
7608 35
                '(?<!q&)a',
7609
                'an',
7610
                'and',
7611
                'as',
7612
                'at(?!&t)',
7613
                'but',
7614
                'by',
7615
                'en',
7616
                'for',
7617
                'if',
7618
                'in',
7619
                'of',
7620
                'on',
7621
                'or',
7622
                'the',
7623
                'to',
7624
                'v[.]?',
7625
                'via',
7626
                'vs[.]?',
7627
            ],
7628 35
            $ignore
7629
        );
7630
7631 35
        $smallWordsRx = \implode('|', $smallWords);
7632 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7633
7634 35
        $str = \trim($str);
7635
7636 35
        if (self::has_lowercase($str) === false) {
7637 2
            $str = self::strtolower($str, $encoding);
7638
        }
7639
7640
        // the main substitutions
7641 35
        $str = (string) \preg_replace_callback(
7642
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7643
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7644 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7645
                        |
7646 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7647
                        |
7648 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7649
                        |
7650 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7651
                      ) (_*) \\b                                                          # 6. With trailing underscore
7652
                    ~ux',
7653
            /**
7654
             * @param string[] $matches
7655
             *
7656
             * @return string
7657
             */
7658
            static function (array $matches) use ($encoding): string {
7659
                // preserve leading underscore
7660 35
                $str = $matches[1];
7661 35
                if ($matches[2]) {
7662
                    // preserve URLs, domains, emails and file paths
7663 5
                    $str .= $matches[2];
7664 35
                } elseif ($matches[3]) {
7665
                    // lower-case small words
7666 25
                    $str .= self::strtolower($matches[3], $encoding);
7667 35
                } elseif ($matches[4]) {
7668
                    // capitalize word w/o internal caps
7669 34
                    $str .= static::str_upper_first($matches[4], $encoding);
0 ignored issues
show
Deprecated Code introduced by
The function voku\helper\UTF8::str_upper_first() has been deprecated: <p>please use "UTF8::ucfirst()"</p> ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

7669
                    $str .= /** @scrutinizer ignore-deprecated */ static::str_upper_first($matches[4], $encoding);

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
7670
                } else {
7671
                    // preserve other kinds of word (iPhone)
7672 7
                    $str .= $matches[5];
7673
                }
7674
                // Preserve trailing underscore
7675 35
                $str .= $matches[6];
7676
7677 35
                return $str;
7678 35
            },
7679 35
            $str
7680
        );
7681
7682
        // Exceptions for small words: capitalize at start of title...
7683 35
        $str = (string) \preg_replace_callback(
7684
            '~(  \\A [[:punct:]]*            # start of title...
7685
                      |  [:.;?!][ ]+                # or of subsentence...
7686
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7687 35
                      ( ' . $smallWordsRx . ' ) \\b # ...followed by small word
7688
                     ~uxi',
7689
            /**
7690
             * @param string[] $matches
7691
             *
7692
             * @return string
7693
             */
7694
            static function (array $matches) use ($encoding): string {
7695 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
0 ignored issues
show
Deprecated Code introduced by
The function voku\helper\UTF8::str_upper_first() has been deprecated: <p>please use "UTF8::ucfirst()"</p> ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

7695
                return $matches[1] . /** @scrutinizer ignore-deprecated */ static::str_upper_first($matches[2], $encoding);

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
7696 35
            },
7697 35
            $str
7698
        );
7699
7700
        // ...and end of title
7701 35
        $str = (string) \preg_replace_callback(
7702 35
            '~\\b ( ' . $smallWordsRx . ' ) # small word...
7703
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7704
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7705
                     ~uxi',
7706
            /**
7707
             * @param string[] $matches
7708
             *
7709
             * @return string
7710
             */
7711
            static function (array $matches) use ($encoding): string {
7712 3
                return static::str_upper_first($matches[1], $encoding);
0 ignored issues
show
Deprecated Code introduced by
The function voku\helper\UTF8::str_upper_first() has been deprecated: <p>please use "UTF8::ucfirst()"</p> ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

7712
                return /** @scrutinizer ignore-deprecated */ static::str_upper_first($matches[1], $encoding);

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
7713 35
            },
7714 35
            $str
7715
        );
7716
7717
        // Exceptions for small words in hyphenated compound words.
7718
        // e.g. "in-flight" -> In-Flight
7719 35
        $str = (string) \preg_replace_callback(
7720
            '~\\b
7721
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7722 35
                        ( ' . $smallWordsRx . ' )
7723
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7724
                       ~uxi',
7725
            /**
7726
             * @param string[] $matches
7727
             *
7728
             * @return string
7729
             */
7730
            static function (array $matches) use ($encoding): string {
7731
                return static::str_upper_first($matches[1], $encoding);
0 ignored issues
show
Deprecated Code introduced by
The function voku\helper\UTF8::str_upper_first() has been deprecated: <p>please use "UTF8::ucfirst()"</p> ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

7731
                return /** @scrutinizer ignore-deprecated */ static::str_upper_first($matches[1], $encoding);

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
7732 35
            },
7733 35
            $str
7734
        );
7735
7736
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7737 35
        $str = (string) \preg_replace_callback(
7738
            '~\\b
7739
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7740
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7741 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7742
                      (?!	- )                 # Negative lookahead for another -
7743
                     ~uxi',
7744
            /**
7745
             * @param string[] $matches
7746
             *
7747
             * @return string
7748
             */
7749
            static function (array $matches) use ($encoding): string {
7750
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
0 ignored issues
show
Deprecated Code introduced by
The function voku\helper\UTF8::str_upper_first() has been deprecated: <p>please use "UTF8::ucfirst()"</p> ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

7750
                return $matches[1] . /** @scrutinizer ignore-deprecated */ static::str_upper_first($matches[2], $encoding);

This function has been deprecated. The supplier of the function has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.

Loading history...
7751 35
            },
7752 35
            $str
7753
        );
7754
7755 35
        return $str;
7756
    }
7757
7758
    /**
7759
     * Get a binary representation of a specific string.
7760
     *
7761
     * @param string $str <p>The input string.</p>
7762
     *
7763
     * @return false|string
7764
     *                      <p>false on error</p>
7765
     */
7766 2
    public static function str_to_binary(string $str)
7767
    {
7768 2
        $value = \unpack('H*', $str);
7769 2
        if ($value === false) {
7770
            return false;
7771
        }
7772
7773
        /** @noinspection OffsetOperationsInspection */
7774 2
        return \base_convert($value[1], 16, 2);
7775
    }
7776
7777
    /**
7778
     * @param string   $str
7779
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7780
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7781
     *
7782
     * @return string[]
7783
     */
7784 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7785
    {
7786 17
        if ($str === '') {
7787 1
            return $removeEmptyValues === true ? [] : [''];
7788
        }
7789
7790 16
        if (self::$SUPPORT['mbstring'] === true) {
7791
            /** @noinspection PhpComposerExtensionStubsInspection */
7792 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7793
        } else {
7794
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7795
        }
7796
7797 16
        if ($return === false) {
7798
            return $removeEmptyValues === true ? [] : [''];
7799
        }
7800
7801
        if (
7802 16
            $removeShortValues === null
7803
            &&
7804 16
            $removeEmptyValues === false
7805
        ) {
7806 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7807
        }
7808
7809
        return self::reduce_string_array(
7810
            $return,
7811
            $removeEmptyValues,
7812
            $removeShortValues
7813
        );
7814
    }
7815
7816
    /**
7817
     * Convert a string into an array of words.
7818
     *
7819
     * @param string   $str
7820
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7821
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7822
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7823
     *
7824
     * @return string[]
7825
     */
7826 13
    public static function str_to_words(
7827
        string $str,
7828
        string $charList = '',
7829
        bool $removeEmptyValues = false,
7830
        int $removeShortValues = null
7831
    ): array {
7832 13
        if ($str === '') {
7833 4
            return $removeEmptyValues === true ? [] : [''];
7834
        }
7835
7836 13
        $charList = self::rxClass($charList, '\pL');
7837
7838 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7839 13
        if ($return === false) {
7840
            return $removeEmptyValues === true ? [] : [''];
7841
        }
7842
7843
        if (
7844 13
            $removeShortValues === null
7845
            &&
7846 13
            $removeEmptyValues === false
7847
        ) {
7848 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7849
        }
7850
7851 2
        $tmpReturn = self::reduce_string_array(
7852 2
            $return,
7853 2
            $removeEmptyValues,
7854 2
            $removeShortValues
7855
        );
7856
7857 2
        foreach ($tmpReturn as &$item) {
7858 2
            $item = (string) $item;
7859
        }
7860
7861 2
        return $tmpReturn;
7862
    }
7863
7864
    /**
7865
     * alias for "UTF8::to_ascii()"
7866
     *
7867
     * @param string $str
7868
     * @param string $unknown
7869
     * @param bool   $strict
7870
     *
7871
     * @return string
7872
     *
7873
     * @see UTF8::to_ascii()
7874
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
7875
     */
7876 7
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7877
    {
7878 7
        return self::to_ascii($str, $unknown, $strict);
7879
    }
7880
7881
    /**
7882
     * Truncates the string to a given length. If $substring is provided, and
7883
     * truncating occurs, the string is further truncated so that the substring
7884
     * may be appended without exceeding the desired length.
7885
     *
7886
     * @param string $str
7887
     * @param int    $length    <p>Desired length of the truncated string.</p>
7888
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7889
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7890
     *
7891
     * @return string string after truncating
7892
     */
7893 22
    public static function str_truncate(
7894
        string $str,
7895
        int $length,
7896
        string $substring = '',
7897
        string $encoding = 'UTF-8'
7898
    ): string {
7899 22
        if ($str === '') {
7900
            return '';
7901
        }
7902
7903 22
        if ($encoding === 'UTF-8') {
7904 10
            if ($length >= (int) \mb_strlen($str)) {
7905 2
                return $str;
7906
            }
7907
7908 8
            if ($substring !== '') {
7909 4
                $length -= (int) \mb_strlen($substring);
7910
7911
                /** @noinspection UnnecessaryCastingInspection */
7912 4
                return (string) \mb_substr($str, 0, $length) . $substring;
7913
            }
7914
7915
            /** @noinspection UnnecessaryCastingInspection */
7916 4
            return (string) \mb_substr($str, 0, $length);
7917
        }
7918
7919 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7920
7921 12
        if ($length >= (int) self::strlen($str, $encoding)) {
7922 2
            return $str;
7923
        }
7924
7925 10
        if ($substring !== '') {
7926 6
            $length -= (int) self::strlen($substring, $encoding);
7927
        }
7928
7929
        return (
7930 10
               (string) self::substr(
7931 10
                   $str,
7932 10
                   0,
7933 10
                   $length,
7934 10
                   $encoding
7935
               )
7936 10
               ) . $substring;
7937
    }
7938
7939
    /**
7940
     * Truncates the string to a given length, while ensuring that it does not
7941
     * split words. If $substring is provided, and truncating occurs, the
7942
     * string is further truncated so that the substring may be appended without
7943
     * exceeding the desired length.
7944
     *
7945
     * @param string $str
7946
     * @param int    $length                          <p>Desired length of the truncated string.</p>
7947
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
7948
     *                                                ''</p>
7949
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
7950
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
7951
     *
7952
     * @return string string after truncating
7953
     */
7954 47
    public static function str_truncate_safe(
7955
        string $str,
7956
        int $length,
7957
        string $substring = '',
7958
        string $encoding = 'UTF-8',
7959
        bool $ignoreDoNotSplitWordsForOneWord = false
7960
    ): string {
7961 47
        if ($str === '' || $length <= 0) {
7962 1
            return $substring;
7963
        }
7964
7965 47
        if ($encoding === 'UTF-8') {
7966 21
            if ($length >= (int) \mb_strlen($str)) {
7967 5
                return $str;
7968
            }
7969
7970
            // need to further trim the string so we can append the substring
7971 17
            $length -= (int) \mb_strlen($substring);
7972 17
            if ($length <= 0) {
7973 1
                return $substring;
7974
            }
7975
7976 17
            $truncated = \mb_substr($str, 0, $length);
7977
7978 17
            if ($truncated === false) {
7979
                return '';
7980
            }
7981
7982
            // if the last word was truncated
7983 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
7984 17
            if ($strPosSpace !== $length) {
7985
                // find pos of the last occurrence of a space, get up to that
7986 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
7987
7988
                if (
7989 13
                    $lastPos !== false
7990
                    ||
7991 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
7992
                ) {
7993 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
7994
                }
7995
            }
7996
        } else {
7997 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7998
7999 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8000 4
                return $str;
8001
            }
8002
8003
            // need to further trim the string so we can append the substring
8004 22
            $length -= (int) self::strlen($substring, $encoding);
8005 22
            if ($length <= 0) {
8006
                return $substring;
8007
            }
8008
8009 22
            $truncated = self::substr($str, 0, $length, $encoding);
8010
8011 22
            if ($truncated === false) {
8012
                return '';
8013
            }
8014
8015
            // if the last word was truncated
8016 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8017 22
            if ($strPosSpace !== $length) {
8018
                // find pos of the last occurrence of a space, get up to that
8019 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8020
8021
                if (
8022 12
                    $lastPos !== false
8023
                    ||
8024 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8025
                ) {
8026 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8027
                }
8028
            }
8029
        }
8030
8031 39
        return $truncated . $substring;
8032
    }
8033
8034
    /**
8035
     * Returns a lowercase and trimmed string separated by underscores.
8036
     * Underscores are inserted before uppercase characters (with the exception
8037
     * of the first character of the string), and in place of spaces as well as
8038
     * dashes.
8039
     *
8040
     * @param string $str
8041
     *
8042
     * @return string the underscored string
8043
     */
8044 16
    public static function str_underscored(string $str): string
8045
    {
8046 16
        return self::str_delimit($str, '_');
8047
    }
8048
8049
    /**
8050
     * Returns an UpperCamelCase version of the supplied string. It trims
8051
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8052
     * and underscores, and removes spaces, dashes, underscores.
8053
     *
8054
     * @param string      $str                   <p>The input string.</p>
8055
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8056
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8057
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8058
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8059
     *
8060
     * @return string string in UpperCamelCase
8061
     */
8062 13
    public static function str_upper_camelize(
8063
        string $str,
8064
        string $encoding = 'UTF-8',
8065
        bool $cleanUtf8 = false,
8066
        string $lang = null,
8067
        bool $tryToKeepStringLength = false
8068
    ): string {
8069 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8070
    }
8071
8072
    /**
8073
     * alias for "UTF8::ucfirst()"
8074
     *
8075
     * @param string      $str
8076
     * @param string      $encoding
8077
     * @param bool        $cleanUtf8
8078
     * @param string|null $lang
8079
     * @param bool        $tryToKeepStringLength
8080
     *
8081
     * @return string
8082
     *
8083
     * @see UTF8::ucfirst()
8084
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8085
     */
8086 39
    public static function str_upper_first(
8087
        string $str,
8088
        string $encoding = 'UTF-8',
8089
        bool $cleanUtf8 = false,
8090
        string $lang = null,
8091
        bool $tryToKeepStringLength = false
8092
    ): string {
8093 39
        return self::ucfirst(
8094 39
            $str,
8095 39
            $encoding,
8096 39
            $cleanUtf8,
8097 39
            $lang,
8098 39
            $tryToKeepStringLength
8099
        );
8100
    }
8101
8102
    /**
8103
     * Counts number of words in the UTF-8 string.
8104
     *
8105
     * @param string $str      <p>The input string.</p>
8106
     * @param int    $format   [optional] <p>
8107
     *                         <strong>0</strong> => return a number of words (default)<br>
8108
     *                         <strong>1</strong> => return an array of words<br>
8109
     *                         <strong>2</strong> => return an array of words with word-offset as key
8110
     *                         </p>
8111
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8112
     *
8113
     * @return int|string[] The number of words in the string
8114
     */
8115 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8116
    {
8117 2
        $strParts = self::str_to_words($str, $charlist);
8118
8119 2
        $len = \count($strParts);
8120
8121 2
        if ($format === 1) {
8122 2
            $numberOfWords = [];
8123 2
            for ($i = 1; $i < $len; $i += 2) {
8124 2
                $numberOfWords[] = $strParts[$i];
8125
            }
8126 2
        } elseif ($format === 2) {
8127 2
            $numberOfWords = [];
8128 2
            $offset = (int) self::strlen($strParts[0]);
8129 2
            for ($i = 1; $i < $len; $i += 2) {
8130 2
                $numberOfWords[$offset] = $strParts[$i];
8131 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8132
            }
8133
        } else {
8134 2
            $numberOfWords = (int) (($len - 1) / 2);
8135
        }
8136
8137 2
        return $numberOfWords;
8138
    }
8139
8140
    /**
8141
     * Case-insensitive string comparison.
8142
     *
8143
     * INFO: Case-insensitive version of UTF8::strcmp()
8144
     *
8145
     * @param string $str1     <p>The first string.</p>
8146
     * @param string $str2     <p>The second string.</p>
8147
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8148
     *
8149
     * @return int
8150
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8151
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8152
     *             <strong>0</strong> if they are equal
8153
     */
8154 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8155
    {
8156 23
        return self::strcmp(
8157 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8158 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8159
        );
8160
    }
8161
8162
    /**
8163
     * alias for "UTF8::strstr()"
8164
     *
8165
     * @param string $haystack
8166
     * @param string $needle
8167
     * @param bool   $before_needle
8168
     * @param string $encoding
8169
     * @param bool   $cleanUtf8
8170
     *
8171
     * @return false|string
8172
     *
8173
     * @see UTF8::strstr()
8174
     */
8175 2
    public static function strchr(
8176
        string $haystack,
8177
        string $needle,
8178
        bool $before_needle = false,
8179
        string $encoding = 'UTF-8',
8180
        bool $cleanUtf8 = false
8181
    ) {
8182 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8183
    }
8184
8185
    /**
8186
     * Case-sensitive string comparison.
8187
     *
8188
     * @param string $str1 <p>The first string.</p>
8189
     * @param string $str2 <p>The second string.</p>
8190
     *
8191
     * @return int
8192
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8193
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8194
     *             <strong>0</strong> if they are equal
8195
     */
8196 29
    public static function strcmp(string $str1, string $str2): int
8197
    {
8198 29
        if ($str1 === $str2) {
8199 21
            return 0;
8200
        }
8201
8202 24
        return \strcmp(
8203 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8204 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8205
        );
8206
    }
8207
8208
    /**
8209
     * Find length of initial segment not matching mask.
8210
     *
8211
     * @param string $str
8212
     * @param string $charList
8213
     * @param int    $offset
8214
     * @param int    $length
8215
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8216
     *
8217
     * @return int
8218
     */
8219 12
    public static function strcspn(
8220
        string $str,
8221
        string $charList,
8222
        int $offset = null,
8223
        int $length = null,
8224
        string $encoding = 'UTF-8'
8225
    ): int {
8226 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8227
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8228
        }
8229
8230 12
        if ($charList === '') {
8231 2
            return (int) self::strlen($str, $encoding);
8232
        }
8233
8234 11
        if ($offset !== null || $length !== null) {
8235 3
            if ($encoding === 'UTF-8') {
8236 3
                if ($length === null) {
8237
                    /** @noinspection UnnecessaryCastingInspection */
8238 2
                    $strTmp = \mb_substr($str, (int) $offset);
8239
                } else {
8240
                    /** @noinspection UnnecessaryCastingInspection */
8241 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8242
                }
8243
            } else {
8244
                /** @noinspection UnnecessaryCastingInspection */
8245
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8246
            }
8247
8248 3
            if ($strTmp === false) {
8249
                return 0;
8250
            }
8251
8252 3
            $str = $strTmp;
8253
        }
8254
8255 11
        if ($str === '') {
8256 2
            return 0;
8257
        }
8258
8259 10
        $matches = [];
8260 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8261 9
            $return = self::strlen($matches[1], $encoding);
8262 9
            if ($return === false) {
8263
                return 0;
8264
            }
8265
8266 9
            return $return;
8267
        }
8268
8269 2
        return (int) self::strlen($str, $encoding);
8270
    }
8271
8272
    /**
8273
     * alias for "UTF8::stristr()"
8274
     *
8275
     * @param string $haystack
8276
     * @param string $needle
8277
     * @param bool   $before_needle
8278
     * @param string $encoding
8279
     * @param bool   $cleanUtf8
8280
     *
8281
     * @return false|string
8282
     *
8283
     * @see UTF8::stristr()
8284
     */
8285 1
    public static function strichr(
8286
        string $haystack,
8287
        string $needle,
8288
        bool $before_needle = false,
8289
        string $encoding = 'UTF-8',
8290
        bool $cleanUtf8 = false
8291
    ) {
8292 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8293
    }
8294
8295
    /**
8296
     * Create a UTF-8 string from code points.
8297
     *
8298
     * INFO: opposite to UTF8::codepoints()
8299
     *
8300
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8301
     *
8302
     * @return string UTF-8 encoded string
8303
     */
8304 4
    public static function string(array $array): string
8305
    {
8306 4
        return \implode(
8307 4
            '',
8308 4
            \array_map(
8309
                [
8310 4
                    self::class,
8311
                    'chr',
8312
                ],
8313 4
                $array
8314
            )
8315
        );
8316
    }
8317
8318
    /**
8319
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8320
     *
8321
     * @param string $str <p>The input string.</p>
8322
     *
8323
     * @return bool
8324
     *              <strong>true</strong> if the string has BOM at the start,<br>
8325
     *              <strong>false</strong> otherwise
8326
     */
8327 6
    public static function string_has_bom(string $str): bool
8328
    {
8329
        /** @noinspection PhpUnusedLocalVariableInspection */
8330 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8331 6
            if (\strpos($str, $bomString) === 0) {
8332 6
                return true;
8333
            }
8334
        }
8335
8336 6
        return false;
8337
    }
8338
8339
    /**
8340
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8341
     *
8342
     * @see http://php.net/manual/en/function.strip-tags.php
8343
     *
8344
     * @param string $str            <p>
8345
     *                               The input string.
8346
     *                               </p>
8347
     * @param string $allowable_tags [optional] <p>
8348
     *                               You can use the optional second parameter to specify tags which should
8349
     *                               not be stripped.
8350
     *                               </p>
8351
     *                               <p>
8352
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8353
     *                               can not be changed with allowable_tags.
8354
     *                               </p>
8355
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8356
     *
8357
     * @return string the stripped string
8358
     */
8359 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8360
    {
8361 4
        if ($str === '') {
8362 1
            return '';
8363
        }
8364
8365 4
        if ($cleanUtf8 === true) {
8366 2
            $str = self::clean($str);
8367
        }
8368
8369 4
        if ($allowable_tags === null) {
8370 4
            return \strip_tags($str);
8371
        }
8372
8373 2
        return \strip_tags($str, $allowable_tags);
8374
    }
8375
8376
    /**
8377
     * Strip all whitespace characters. This includes tabs and newline
8378
     * characters, as well as multibyte whitespace such as the thin space
8379
     * and ideographic space.
8380
     *
8381
     * @param string $str
8382
     *
8383
     * @return string
8384
     */
8385 36
    public static function strip_whitespace(string $str): string
8386
    {
8387 36
        if ($str === '') {
8388 3
            return '';
8389
        }
8390
8391 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8392
    }
8393
8394
    /**
8395
     * Finds position of first occurrence of a string within another, case insensitive.
8396
     *
8397
     * @see http://php.net/manual/en/function.mb-stripos.php
8398
     *
8399
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8400
     * @param string $needle    <p>The string to find in haystack.</p>
8401
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8402
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8403
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8404
     *
8405
     * @return false|int
8406
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8407
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8408
     */
8409 24
    public static function stripos(
8410
        string $haystack,
8411
        string $needle,
8412
        int $offset = 0,
8413
        $encoding = 'UTF-8',
8414
        bool $cleanUtf8 = false
8415
    ) {
8416 24
        if ($haystack === '' || $needle === '') {
8417 5
            return false;
8418
        }
8419
8420 23
        if ($cleanUtf8 === true) {
8421
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8422
            // if invalid characters are found in $haystack before $needle
8423 1
            $haystack = self::clean($haystack);
8424 1
            $needle = self::clean($needle);
8425
        }
8426
8427 23
        if (self::$SUPPORT['mbstring'] === true) {
8428 23
            if ($encoding === 'UTF-8') {
8429 23
                return \mb_stripos($haystack, $needle, $offset);
8430
            }
8431
8432 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8433
8434 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8435
        }
8436
8437 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8438
8439
        if (
8440 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8441
            &&
8442 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8443
            &&
8444 2
            self::$SUPPORT['intl'] === true
8445
        ) {
8446
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8447
            if ($returnTmp !== false) {
8448
                return $returnTmp;
8449
            }
8450
        }
8451
8452
        //
8453
        // fallback for ascii only
8454
        //
8455
8456 2
        if (ASCII::is_ascii($haystack . $needle)) {
8457
            return \stripos($haystack, $needle, $offset);
8458
        }
8459
8460
        //
8461
        // fallback via vanilla php
8462
        //
8463
8464 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8465 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8466
8467 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8468
    }
8469
8470
    /**
8471
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8472
     *
8473
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8474
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8475
     * @param bool   $before_needle [optional] <p>
8476
     *                              If <b>TRUE</b>, it returns the part of the
8477
     *                              haystack before the first occurrence of the needle (excluding the needle).
8478
     *                              </p>
8479
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8480
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8481
     *
8482
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8483
     */
8484 12
    public static function stristr(
8485
        string $haystack,
8486
        string $needle,
8487
        bool $before_needle = false,
8488
        string $encoding = 'UTF-8',
8489
        bool $cleanUtf8 = false
8490
    ) {
8491 12
        if ($haystack === '' || $needle === '') {
8492 3
            return false;
8493
        }
8494
8495 9
        if ($cleanUtf8 === true) {
8496
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8497
            // if invalid characters are found in $haystack before $needle
8498 1
            $needle = self::clean($needle);
8499 1
            $haystack = self::clean($haystack);
8500
        }
8501
8502 9
        if (!$needle) {
8503
            return $haystack;
8504
        }
8505
8506 9
        if (self::$SUPPORT['mbstring'] === true) {
8507 9
            if ($encoding === 'UTF-8') {
8508 9
                return \mb_stristr($haystack, $needle, $before_needle);
8509
            }
8510
8511 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8512
8513 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8514
        }
8515
8516
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8517
8518
        if (
8519
            $encoding !== 'UTF-8'
8520
            &&
8521
            self::$SUPPORT['mbstring'] === false
8522
        ) {
8523
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8524
        }
8525
8526
        if (
8527
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8528
            &&
8529
            self::$SUPPORT['intl'] === true
8530
        ) {
8531
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8532
            if ($returnTmp !== false) {
8533
                return $returnTmp;
8534
            }
8535
        }
8536
8537
        if (ASCII::is_ascii($needle . $haystack)) {
8538
            return \stristr($haystack, $needle, $before_needle);
8539
        }
8540
8541
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8542
8543
        if (!isset($match[1])) {
8544
            return false;
8545
        }
8546
8547
        if ($before_needle) {
8548
            return $match[1];
8549
        }
8550
8551
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8552
    }
8553
8554
    /**
8555
     * Get the string length, not the byte-length!
8556
     *
8557
     * @see http://php.net/manual/en/function.mb-strlen.php
8558
     *
8559
     * @param string $str       <p>The string being checked for length.</p>
8560
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8561
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8562
     *
8563
     * @return false|int
8564
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8565
     *                   $encoding.
8566
     *                   (One multi-byte character counted as +1).
8567
     *                   <br>
8568
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8569
     *                   chars.
8570
     */
8571 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8572
    {
8573 173
        if ($str === '') {
8574 21
            return 0;
8575
        }
8576
8577 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8578 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8579
        }
8580
8581 171
        if ($cleanUtf8 === true) {
8582
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8583
            // if invalid characters are found in $str
8584 4
            $str = self::clean($str);
8585
        }
8586
8587
        //
8588
        // fallback via mbstring
8589
        //
8590
8591 171
        if (self::$SUPPORT['mbstring'] === true) {
8592 165
            if ($encoding === 'UTF-8') {
8593 165
                return \mb_strlen($str);
8594
            }
8595
8596 4
            return \mb_strlen($str, $encoding);
8597
        }
8598
8599
        //
8600
        // fallback for binary || ascii only
8601
        //
8602
8603
        if (
8604 8
            $encoding === 'CP850'
8605
            ||
8606 8
            $encoding === 'ASCII'
8607
        ) {
8608
            return \strlen($str);
8609
        }
8610
8611
        if (
8612 8
            $encoding !== 'UTF-8'
8613
            &&
8614 8
            self::$SUPPORT['mbstring'] === false
8615
            &&
8616 8
            self::$SUPPORT['iconv'] === false
8617
        ) {
8618 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8619
        }
8620
8621
        //
8622
        // fallback via iconv
8623
        //
8624
8625 8
        if (self::$SUPPORT['iconv'] === true) {
8626
            $returnTmp = \iconv_strlen($str, $encoding);
8627
            if ($returnTmp !== false) {
8628
                return $returnTmp;
8629
            }
8630
        }
8631
8632
        //
8633
        // fallback via intl
8634
        //
8635
8636
        if (
8637 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8638
            &&
8639 8
            self::$SUPPORT['intl'] === true
8640
        ) {
8641
            $returnTmp = \grapheme_strlen($str);
8642
            if ($returnTmp !== null) {
8643
                return $returnTmp;
8644
            }
8645
        }
8646
8647
        //
8648
        // fallback for ascii only
8649
        //
8650
8651 8
        if (ASCII::is_ascii($str)) {
8652 4
            return \strlen($str);
8653
        }
8654
8655
        //
8656
        // fallback via vanilla php
8657
        //
8658
8659 8
        \preg_match_all('/./us', $str, $parts);
8660
8661 8
        $returnTmp = \count($parts[0]);
8662 8
        if ($returnTmp === 0) {
8663
            return false;
8664
        }
8665
8666 8
        return $returnTmp;
8667
    }
8668
8669
    /**
8670
     * Get string length in byte.
8671
     *
8672
     * @param string $str
8673
     *
8674
     * @return int
8675
     */
8676
    public static function strlen_in_byte(string $str): int
8677
    {
8678
        if ($str === '') {
8679
            return 0;
8680
        }
8681
8682
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8683
            // "mb_" is available if overload is used, so use it ...
8684
            return \mb_strlen($str, 'CP850'); // 8-BIT
8685
        }
8686
8687
        return \strlen($str);
8688
    }
8689
8690
    /**
8691
     * Case insensitive string comparisons using a "natural order" algorithm.
8692
     *
8693
     * INFO: natural order version of UTF8::strcasecmp()
8694
     *
8695
     * @param string $str1     <p>The first string.</p>
8696
     * @param string $str2     <p>The second string.</p>
8697
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8698
     *
8699
     * @return int
8700
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8701
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8702
     *             <strong>0</strong> if they are equal
8703
     */
8704 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8705
    {
8706 2
        return self::strnatcmp(
8707 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8708 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8709
        );
8710
    }
8711
8712
    /**
8713
     * String comparisons using a "natural order" algorithm
8714
     *
8715
     * INFO: natural order version of UTF8::strcmp()
8716
     *
8717
     * @see http://php.net/manual/en/function.strnatcmp.php
8718
     *
8719
     * @param string $str1 <p>The first string.</p>
8720
     * @param string $str2 <p>The second string.</p>
8721
     *
8722
     * @return int
8723
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8724
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8725
     *             <strong>0</strong> if they are equal
8726
     */
8727 4
    public static function strnatcmp(string $str1, string $str2): int
8728
    {
8729 4
        if ($str1 === $str2) {
8730 4
            return 0;
8731
        }
8732
8733 4
        return \strnatcmp(
8734 4
            (string) self::strtonatfold($str1),
8735 4
            (string) self::strtonatfold($str2)
8736
        );
8737
    }
8738
8739
    /**
8740
     * Case-insensitive string comparison of the first n characters.
8741
     *
8742
     * @see http://php.net/manual/en/function.strncasecmp.php
8743
     *
8744
     * @param string $str1     <p>The first string.</p>
8745
     * @param string $str2     <p>The second string.</p>
8746
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8747
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8748
     *
8749
     * @return int
8750
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8751
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8752
     *             <strong>0</strong> if they are equal
8753
     */
8754 2
    public static function strncasecmp(
8755
        string $str1,
8756
        string $str2,
8757
        int $len,
8758
        string $encoding = 'UTF-8'
8759
    ): int {
8760 2
        return self::strncmp(
8761 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8762 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8763 2
            $len
8764
        );
8765
    }
8766
8767
    /**
8768
     * String comparison of the first n characters.
8769
     *
8770
     * @see http://php.net/manual/en/function.strncmp.php
8771
     *
8772
     * @param string $str1     <p>The first string.</p>
8773
     * @param string $str2     <p>The second string.</p>
8774
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8775
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8776
     *
8777
     * @return int
8778
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8779
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8780
     *             <strong>0</strong> if they are equal
8781
     */
8782 4
    public static function strncmp(
8783
        string $str1,
8784
        string $str2,
8785
        int $len,
8786
        string $encoding = 'UTF-8'
8787
    ): int {
8788 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8789
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8790
        }
8791
8792 4
        if ($encoding === 'UTF-8') {
8793 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8794 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8795
        } else {
8796
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8797
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8798
        }
8799
8800 4
        return self::strcmp($str1, $str2);
8801
    }
8802
8803
    /**
8804
     * Search a string for any of a set of characters.
8805
     *
8806
     * @see http://php.net/manual/en/function.strpbrk.php
8807
     *
8808
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8809
     * @param string $char_list <p>This parameter is case sensitive.</p>
8810
     *
8811
     * @return false|string string starting from the character found, or false if it is not found
8812
     */
8813 2
    public static function strpbrk(string $haystack, string $char_list)
8814
    {
8815 2
        if ($haystack === '' || $char_list === '') {
8816 2
            return false;
8817
        }
8818
8819 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8820 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8821
        }
8822
8823 2
        return false;
8824
    }
8825
8826
    /**
8827
     * Find position of first occurrence of string in a string.
8828
     *
8829
     * @see http://php.net/manual/en/function.mb-strpos.php
8830
     *
8831
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8832
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8833
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8834
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8835
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8836
     *
8837
     * @return false|int
8838
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8839
     *                   string.<br> If needle is not found it returns false.
8840
     */
8841 53
    public static function strpos(
8842
        string $haystack,
8843
        $needle,
8844
        int $offset = 0,
8845
        $encoding = 'UTF-8',
8846
        bool $cleanUtf8 = false
8847
    ) {
8848 53
        if ($haystack === '') {
8849 4
            return false;
8850
        }
8851
8852
        // iconv and mbstring do not support integer $needle
8853 52
        if ((int) $needle === $needle) {
8854
            $needle = (string) self::chr($needle);
8855
        }
8856 52
        $needle = (string) $needle;
8857
8858 52
        if ($needle === '') {
8859 2
            return false;
8860
        }
8861
8862 52
        if ($cleanUtf8 === true) {
8863
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8864
            // if invalid characters are found in $haystack before $needle
8865 3
            $needle = self::clean($needle);
8866 3
            $haystack = self::clean($haystack);
8867
        }
8868
8869 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8870 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8871
        }
8872
8873
        //
8874
        // fallback via mbstring
8875
        //
8876
8877 52
        if (self::$SUPPORT['mbstring'] === true) {
8878 50
            if ($encoding === 'UTF-8') {
8879 50
                return \mb_strpos($haystack, $needle, $offset);
8880
            }
8881
8882 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8883
        }
8884
8885
        //
8886
        // fallback for binary || ascii only
8887
        //
8888
        if (
8889 4
            $encoding === 'CP850'
8890
            ||
8891 4
            $encoding === 'ASCII'
8892
        ) {
8893 2
            return \strpos($haystack, $needle, $offset);
8894
        }
8895
8896
        if (
8897 4
            $encoding !== 'UTF-8'
8898
            &&
8899 4
            self::$SUPPORT['iconv'] === false
8900
            &&
8901 4
            self::$SUPPORT['mbstring'] === false
8902
        ) {
8903 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8904
        }
8905
8906
        //
8907
        // fallback via intl
8908
        //
8909
8910
        if (
8911 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8912
            &&
8913 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8914
            &&
8915 4
            self::$SUPPORT['intl'] === true
8916
        ) {
8917
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8918
            if ($returnTmp !== false) {
8919
                return $returnTmp;
8920
            }
8921
        }
8922
8923
        //
8924
        // fallback via iconv
8925
        //
8926
8927
        if (
8928 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
8929
            &&
8930 4
            self::$SUPPORT['iconv'] === true
8931
        ) {
8932
            // ignore invalid negative offset to keep compatibility
8933
            // with php < 5.5.35, < 5.6.21, < 7.0.6
8934
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
8935
            if ($returnTmp !== false) {
8936
                return $returnTmp;
8937
            }
8938
        }
8939
8940
        //
8941
        // fallback for ascii only
8942
        //
8943
8944 4
        if (ASCII::is_ascii($haystack . $needle)) {
8945 2
            return \strpos($haystack, $needle, $offset);
8946
        }
8947
8948
        //
8949
        // fallback via vanilla php
8950
        //
8951
8952 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
8953 4
        if ($haystackTmp === false) {
8954
            $haystackTmp = '';
8955
        }
8956 4
        $haystack = (string) $haystackTmp;
8957
8958 4
        if ($offset < 0) {
8959
            $offset = 0;
8960
        }
8961
8962 4
        $pos = \strpos($haystack, $needle);
8963 4
        if ($pos === false) {
8964 2
            return false;
8965
        }
8966
8967 4
        if ($pos) {
8968 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
8969
        }
8970
8971 2
        return $offset + 0;
8972
    }
8973
8974
    /**
8975
     * Find position of first occurrence of string in a string.
8976
     *
8977
     * @param string $haystack <p>
8978
     *                         The string being checked.
8979
     *                         </p>
8980
     * @param string $needle   <p>
8981
     *                         The position counted from the beginning of haystack.
8982
     *                         </p>
8983
     * @param int    $offset   [optional] <p>
8984
     *                         The search offset. If it is not specified, 0 is used.
8985
     *                         </p>
8986
     *
8987
     * @return false|int The numeric position of the first occurrence of needle in the
8988
     *                   haystack string. If needle is not found, it returns false.
8989
     */
8990
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
8991
    {
8992
        if ($haystack === '' || $needle === '') {
8993
            return false;
8994
        }
8995
8996
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8997
            // "mb_" is available if overload is used, so use it ...
8998
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
8999
        }
9000
9001
        return \strpos($haystack, $needle, $offset);
9002
    }
9003
9004
    /**
9005
     * Finds the last occurrence of a character in a string within another.
9006
     *
9007
     * @see http://php.net/manual/en/function.mb-strrchr.php
9008
     *
9009
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9010
     * @param string $needle        <p>The string to find in haystack</p>
9011
     * @param bool   $before_needle [optional] <p>
9012
     *                              Determines which portion of haystack
9013
     *                              this function returns.
9014
     *                              If set to true, it returns all of haystack
9015
     *                              from the beginning to the last occurrence of needle.
9016
     *                              If set to false, it returns all of haystack
9017
     *                              from the last occurrence of needle to the end,
9018
     *                              </p>
9019
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9020
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9021
     *
9022
     * @return false|string the portion of haystack or false if needle is not found
9023
     */
9024 2
    public static function strrchr(
9025
        string $haystack,
9026
        string $needle,
9027
        bool $before_needle = false,
9028
        string $encoding = 'UTF-8',
9029
        bool $cleanUtf8 = false
9030
    ) {
9031 2
        if ($haystack === '' || $needle === '') {
9032 2
            return false;
9033
        }
9034
9035 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9036 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9037
        }
9038
9039 2
        if ($cleanUtf8 === true) {
9040
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9041
            // if invalid characters are found in $haystack before $needle
9042 2
            $needle = self::clean($needle);
9043 2
            $haystack = self::clean($haystack);
9044
        }
9045
9046
        //
9047
        // fallback via mbstring
9048
        //
9049
9050 2
        if (self::$SUPPORT['mbstring'] === true) {
9051 2
            if ($encoding === 'UTF-8') {
9052 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9053
            }
9054
9055 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9056
        }
9057
9058
        //
9059
        // fallback for binary || ascii only
9060
        //
9061
9062
        if (
9063
            $before_needle === false
9064
            &&
9065
            (
9066
                $encoding === 'CP850'
9067
                ||
9068
                $encoding === 'ASCII'
9069
            )
9070
        ) {
9071
            return \strrchr($haystack, $needle);
9072
        }
9073
9074
        if (
9075
            $encoding !== 'UTF-8'
9076
            &&
9077
            self::$SUPPORT['mbstring'] === false
9078
        ) {
9079
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9080
        }
9081
9082
        //
9083
        // fallback via iconv
9084
        //
9085
9086
        if (self::$SUPPORT['iconv'] === true) {
9087
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9088
            if ($needleTmp === false) {
9089
                return false;
9090
            }
9091
            $needle = (string) $needleTmp;
9092
9093
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9094
            if ($pos === false) {
9095
                return false;
9096
            }
9097
9098
            if ($before_needle) {
9099
                return self::substr($haystack, 0, $pos, $encoding);
9100
            }
9101
9102
            return self::substr($haystack, $pos, null, $encoding);
9103
        }
9104
9105
        //
9106
        // fallback via vanilla php
9107
        //
9108
9109
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9110
        if ($needleTmp === false) {
9111
            return false;
9112
        }
9113
        $needle = (string) $needleTmp;
9114
9115
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9116
        if ($pos === false) {
9117
            return false;
9118
        }
9119
9120
        if ($before_needle) {
9121
            return self::substr($haystack, 0, $pos, $encoding);
9122
        }
9123
9124
        return self::substr($haystack, $pos, null, $encoding);
9125
    }
9126
9127
    /**
9128
     * Reverses characters order in the string.
9129
     *
9130
     * @param string $str      <p>The input string.</p>
9131
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9132
     *
9133
     * @return string the string with characters in the reverse sequence
9134
     */
9135 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9136
    {
9137 10
        if ($str === '') {
9138 4
            return '';
9139
        }
9140
9141
        // init
9142 8
        $reversed = '';
9143
9144 8
        $str = self::emoji_encode($str, true);
9145
9146 8
        if ($encoding === 'UTF-8') {
9147 8
            if (self::$SUPPORT['intl'] === true) {
9148
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9149 8
                $i = (int) \grapheme_strlen($str);
9150 8
                while ($i--) {
9151 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9152 8
                    if ($reversedTmp !== false) {
9153 8
                        $reversed .= $reversedTmp;
9154
                    }
9155
                }
9156
            } else {
9157
                $i = (int) \mb_strlen($str);
9158 8
                while ($i--) {
9159
                    $reversedTmp = \mb_substr($str, $i, 1);
9160
                    if ($reversedTmp !== false) {
9161
                        $reversed .= $reversedTmp;
9162
                    }
9163
                }
9164
            }
9165
        } else {
9166
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9167
9168
            $i = (int) self::strlen($str, $encoding);
9169
            while ($i--) {
9170
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9171
                if ($reversedTmp !== false) {
9172
                    $reversed .= $reversedTmp;
9173
                }
9174
            }
9175
        }
9176
9177 8
        return self::emoji_decode($reversed, true);
9178
    }
9179
9180
    /**
9181
     * Finds the last occurrence of a character in a string within another, case insensitive.
9182
     *
9183
     * @see http://php.net/manual/en/function.mb-strrichr.php
9184
     *
9185
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9186
     * @param string $needle        <p>The string to find in haystack.</p>
9187
     * @param bool   $before_needle [optional] <p>
9188
     *                              Determines which portion of haystack
9189
     *                              this function returns.
9190
     *                              If set to true, it returns all of haystack
9191
     *                              from the beginning to the last occurrence of needle.
9192
     *                              If set to false, it returns all of haystack
9193
     *                              from the last occurrence of needle to the end,
9194
     *                              </p>
9195
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9196
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9197
     *
9198
     * @return false|string the portion of haystack or<br>false if needle is not found
9199
     */
9200 3
    public static function strrichr(
9201
        string $haystack,
9202
        string $needle,
9203
        bool $before_needle = false,
9204
        string $encoding = 'UTF-8',
9205
        bool $cleanUtf8 = false
9206
    ) {
9207 3
        if ($haystack === '' || $needle === '') {
9208 2
            return false;
9209
        }
9210
9211 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9212 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9213
        }
9214
9215 3
        if ($cleanUtf8 === true) {
9216
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9217
            // if invalid characters are found in $haystack before $needle
9218 2
            $needle = self::clean($needle);
9219 2
            $haystack = self::clean($haystack);
9220
        }
9221
9222
        //
9223
        // fallback via mbstring
9224
        //
9225
9226 3
        if (self::$SUPPORT['mbstring'] === true) {
9227 3
            if ($encoding === 'UTF-8') {
9228 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9229
            }
9230
9231 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9232
        }
9233
9234
        //
9235
        // fallback via vanilla php
9236
        //
9237
9238
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9239
        if ($needleTmp === false) {
9240
            return false;
9241
        }
9242
        $needle = (string) $needleTmp;
9243
9244
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9245
        if ($pos === false) {
9246
            return false;
9247
        }
9248
9249
        if ($before_needle) {
9250
            return self::substr($haystack, 0, $pos, $encoding);
9251
        }
9252
9253
        return self::substr($haystack, $pos, null, $encoding);
9254
    }
9255
9256
    /**
9257
     * Find position of last occurrence of a case-insensitive string.
9258
     *
9259
     * @param string     $haystack  <p>The string to look in.</p>
9260
     * @param int|string $needle    <p>The string to look for.</p>
9261
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9262
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9263
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9264
     *
9265
     * @return false|int
9266
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9267
     *                   string.<br>If needle is not found, it returns false.
9268
     */
9269 3
    public static function strripos(
9270
        string $haystack,
9271
        $needle,
9272
        int $offset = 0,
9273
        string $encoding = 'UTF-8',
9274
        bool $cleanUtf8 = false
9275
    ) {
9276 3
        if ($haystack === '') {
9277
            return false;
9278
        }
9279
9280
        // iconv and mbstring do not support integer $needle
9281 3
        if ((int) $needle === $needle && $needle >= 0) {
9282
            $needle = (string) self::chr($needle);
9283
        }
9284 3
        $needle = (string) $needle;
9285
9286 3
        if ($needle === '') {
9287
            return false;
9288
        }
9289
9290 3
        if ($cleanUtf8 === true) {
9291
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9292 2
            $needle = self::clean($needle);
9293 2
            $haystack = self::clean($haystack);
9294
        }
9295
9296 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9297 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9298
        }
9299
9300
        //
9301
        // fallback via mbstrig
9302
        //
9303
9304 3
        if (self::$SUPPORT['mbstring'] === true) {
9305 3
            if ($encoding === 'UTF-8') {
9306 3
                return \mb_strripos($haystack, $needle, $offset);
9307
            }
9308
9309
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9310
        }
9311
9312
        //
9313
        // fallback for binary || ascii only
9314
        //
9315
9316
        if (
9317
            $encoding === 'CP850'
9318
            ||
9319
            $encoding === 'ASCII'
9320
        ) {
9321
            return \strripos($haystack, $needle, $offset);
9322
        }
9323
9324
        if (
9325
            $encoding !== 'UTF-8'
9326
            &&
9327
            self::$SUPPORT['mbstring'] === false
9328
        ) {
9329
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9330
        }
9331
9332
        //
9333
        // fallback via intl
9334
        //
9335
9336
        if (
9337
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9338
            &&
9339
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9340
            &&
9341
            self::$SUPPORT['intl'] === true
9342
        ) {
9343
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9344
            if ($returnTmp !== false) {
9345
                return $returnTmp;
9346
            }
9347
        }
9348
9349
        //
9350
        // fallback for ascii only
9351
        //
9352
9353
        if (ASCII::is_ascii($haystack . $needle)) {
9354
            return \strripos($haystack, $needle, $offset);
9355
        }
9356
9357
        //
9358
        // fallback via vanilla php
9359
        //
9360
9361
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9362
        $needle = self::strtocasefold($needle, true, false, $encoding);
9363
9364
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9365
    }
9366
9367
    /**
9368
     * Finds position of last occurrence of a string within another, case insensitive.
9369
     *
9370
     * @param string $haystack <p>
9371
     *                         The string from which to get the position of the last occurrence
9372
     *                         of needle.
9373
     *                         </p>
9374
     * @param string $needle   <p>
9375
     *                         The string to find in haystack.
9376
     *                         </p>
9377
     * @param int    $offset   [optional] <p>
9378
     *                         The position in haystack
9379
     *                         to start searching.
9380
     *                         </p>
9381
     *
9382
     * @return false|int return the numeric position of the last occurrence of needle in the
9383
     *                   haystack string, or false if needle is not found
9384
     */
9385
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9386
    {
9387
        if ($haystack === '' || $needle === '') {
9388
            return false;
9389
        }
9390
9391
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9392
            // "mb_" is available if overload is used, so use it ...
9393
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9394
        }
9395
9396
        return \strripos($haystack, $needle, $offset);
9397
    }
9398
9399
    /**
9400
     * Find position of last occurrence of a string in a string.
9401
     *
9402
     * @see http://php.net/manual/en/function.mb-strrpos.php
9403
     *
9404
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9405
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9406
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9407
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9408
     *                              the end of the string.
9409
     *                              </p>
9410
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9411
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9412
     *
9413
     * @return false|int
9414
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9415
     *                   string.<br>If needle is not found, it returns false.
9416
     */
9417 35
    public static function strrpos(
9418
        string $haystack,
9419
        $needle,
9420
        int $offset = 0,
9421
        string $encoding = 'UTF-8',
9422
        bool $cleanUtf8 = false
9423
    ) {
9424 35
        if ($haystack === '') {
9425 3
            return false;
9426
        }
9427
9428
        // iconv and mbstring do not support integer $needle
9429 34
        if ((int) $needle === $needle && $needle >= 0) {
9430 2
            $needle = (string) self::chr($needle);
9431
        }
9432 34
        $needle = (string) $needle;
9433
9434 34
        if ($needle === '') {
9435 2
            return false;
9436
        }
9437
9438 34
        if ($cleanUtf8 === true) {
9439
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9440 4
            $needle = self::clean($needle);
9441 4
            $haystack = self::clean($haystack);
9442
        }
9443
9444 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9445 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9446
        }
9447
9448
        //
9449
        // fallback via mbstring
9450
        //
9451
9452 34
        if (self::$SUPPORT['mbstring'] === true) {
9453 34
            if ($encoding === 'UTF-8') {
9454 34
                return \mb_strrpos($haystack, $needle, $offset);
9455
            }
9456
9457 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9458
        }
9459
9460
        //
9461
        // fallback for binary || ascii only
9462
        //
9463
9464
        if (
9465
            $encoding === 'CP850'
9466
            ||
9467
            $encoding === 'ASCII'
9468
        ) {
9469
            return \strrpos($haystack, $needle, $offset);
9470
        }
9471
9472
        if (
9473
            $encoding !== 'UTF-8'
9474
            &&
9475
            self::$SUPPORT['mbstring'] === false
9476
        ) {
9477
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9478
        }
9479
9480
        //
9481
        // fallback via intl
9482
        //
9483
9484
        if (
9485
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9486
            &&
9487
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9488
            &&
9489
            self::$SUPPORT['intl'] === true
9490
        ) {
9491
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9492
            if ($returnTmp !== false) {
9493
                return $returnTmp;
9494
            }
9495
        }
9496
9497
        //
9498
        // fallback for ascii only
9499
        //
9500
9501
        if (ASCII::is_ascii($haystack . $needle)) {
9502
            return \strrpos($haystack, $needle, $offset);
9503
        }
9504
9505
        //
9506
        // fallback via vanilla php
9507
        //
9508
9509
        $haystackTmp = null;
9510
        if ($offset > 0) {
9511
            $haystackTmp = self::substr($haystack, $offset);
9512
        } elseif ($offset < 0) {
9513
            $haystackTmp = self::substr($haystack, 0, $offset);
9514
            $offset = 0;
9515
        }
9516
9517
        if ($haystackTmp !== null) {
9518
            if ($haystackTmp === false) {
9519
                $haystackTmp = '';
9520
            }
9521
            $haystack = (string) $haystackTmp;
9522
        }
9523
9524
        $pos = \strrpos($haystack, $needle);
9525
        if ($pos === false) {
9526
            return false;
9527
        }
9528
9529
        $strTmp = \substr($haystack, 0, $pos);
9530
        if ($strTmp === false) {
9531
            return false;
9532
        }
9533
9534
        return $offset + (int) self::strlen($strTmp);
9535
    }
9536
9537
    /**
9538
     * Find position of last occurrence of a string in a string.
9539
     *
9540
     * @param string $haystack <p>
9541
     *                         The string being checked, for the last occurrence
9542
     *                         of needle.
9543
     *                         </p>
9544
     * @param string $needle   <p>
9545
     *                         The string to find in haystack.
9546
     *                         </p>
9547
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9548
     *                         the string. Negative values will stop searching at an arbitrary point
9549
     *                         prior to the end of the string.
9550
     *
9551
     * @return false|int The numeric position of the last occurrence of needle in the
9552
     *                   haystack string. If needle is not found, it returns false.
9553
     */
9554
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9555
    {
9556
        if ($haystack === '' || $needle === '') {
9557
            return false;
9558
        }
9559
9560
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9561
            // "mb_" is available if overload is used, so use it ...
9562
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9563
        }
9564
9565
        return \strrpos($haystack, $needle, $offset);
9566
    }
9567
9568
    /**
9569
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9570
     * mask.
9571
     *
9572
     * @param string $str      <p>The input string.</p>
9573
     * @param string $mask     <p>The mask of chars</p>
9574
     * @param int    $offset   [optional]
9575
     * @param int    $length   [optional]
9576
     * @param string $encoding [optional] <p>Set the charset.</p>
9577
     *
9578
     * @return false|int
9579
     */
9580 10
    public static function strspn(
9581
        string $str,
9582
        string $mask,
9583
        int $offset = 0,
9584
        int $length = null,
9585
        string $encoding = 'UTF-8'
9586
    ) {
9587 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9588
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9589
        }
9590
9591 10
        if ($offset || $length !== null) {
9592 2
            if ($encoding === 'UTF-8') {
9593 2
                if ($length === null) {
9594
                    $str = (string) \mb_substr($str, $offset);
9595
                } else {
9596 2
                    $str = (string) \mb_substr($str, $offset, $length);
9597
                }
9598
            } else {
9599
                $str = (string) self::substr($str, $offset, $length, $encoding);
9600
            }
9601
        }
9602
9603 10
        if ($str === '' || $mask === '') {
9604 2
            return 0;
9605
        }
9606
9607 8
        $matches = [];
9608
9609 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9610
    }
9611
9612
    /**
9613
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9614
     *
9615
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9616
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9617
     * @param bool   $before_needle [optional] <p>
9618
     *                              If <b>TRUE</b>, strstr() returns the part of the
9619
     *                              haystack before the first occurrence of the needle (excluding the needle).
9620
     *                              </p>
9621
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9622
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9623
     *
9624
     * @return false|string
9625
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9626
     */
9627 3
    public static function strstr(
9628
        string $haystack,
9629
        string $needle,
9630
        bool $before_needle = false,
9631
        string $encoding = 'UTF-8',
9632
        $cleanUtf8 = false
9633
    ) {
9634 3
        if ($haystack === '' || $needle === '') {
9635 2
            return false;
9636
        }
9637
9638 3
        if ($cleanUtf8 === true) {
9639
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9640
            // if invalid characters are found in $haystack before $needle
9641
            $needle = self::clean($needle);
9642
            $haystack = self::clean($haystack);
9643
        }
9644
9645 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9646 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9647
        }
9648
9649
        //
9650
        // fallback via mbstring
9651
        //
9652
9653 3
        if (self::$SUPPORT['mbstring'] === true) {
9654 3
            if ($encoding === 'UTF-8') {
9655 3
                return \mb_strstr($haystack, $needle, $before_needle);
9656
            }
9657
9658 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9659
        }
9660
9661
        //
9662
        // fallback for binary || ascii only
9663
        //
9664
9665
        if (
9666
            $encoding === 'CP850'
9667
            ||
9668
            $encoding === 'ASCII'
9669
        ) {
9670
            return \strstr($haystack, $needle, $before_needle);
9671
        }
9672
9673
        if (
9674
            $encoding !== 'UTF-8'
9675
            &&
9676
            self::$SUPPORT['mbstring'] === false
9677
        ) {
9678
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9679
        }
9680
9681
        //
9682
        // fallback via intl
9683
        //
9684
9685
        if (
9686
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9687
            &&
9688
            self::$SUPPORT['intl'] === true
9689
        ) {
9690
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9691
            if ($returnTmp !== false) {
9692
                return $returnTmp;
9693
            }
9694
        }
9695
9696
        //
9697
        // fallback for ascii only
9698
        //
9699
9700
        if (ASCII::is_ascii($haystack . $needle)) {
9701
            return \strstr($haystack, $needle, $before_needle);
9702
        }
9703
9704
        //
9705
        // fallback via vanilla php
9706
        //
9707
9708
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9709
9710
        if (!isset($match[1])) {
9711
            return false;
9712
        }
9713
9714
        if ($before_needle) {
9715
            return $match[1];
9716
        }
9717
9718
        return self::substr($haystack, (int) self::strlen($match[1]));
9719
    }
9720
9721
    /**
9722
     *  * Finds first occurrence of a string within another.
9723
     *
9724
     * @param string $haystack      <p>
9725
     *                              The string from which to get the first occurrence
9726
     *                              of needle.
9727
     *                              </p>
9728
     * @param string $needle        <p>
9729
     *                              The string to find in haystack.
9730
     *                              </p>
9731
     * @param bool   $before_needle [optional] <p>
9732
     *                              Determines which portion of haystack
9733
     *                              this function returns.
9734
     *                              If set to true, it returns all of haystack
9735
     *                              from the beginning to the first occurrence of needle.
9736
     *                              If set to false, it returns all of haystack
9737
     *                              from the first occurrence of needle to the end,
9738
     *                              </p>
9739
     *
9740
     * @return false|string the portion of haystack,
9741
     *                      or false if needle is not found
9742
     */
9743
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9744
    {
9745
        if ($haystack === '' || $needle === '') {
9746
            return false;
9747
        }
9748
9749
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9750
            // "mb_" is available if overload is used, so use it ...
9751
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9752
        }
9753
9754
        return \strstr($haystack, $needle, $before_needle);
9755
    }
9756
9757
    /**
9758
     * Unicode transformation for case-less matching.
9759
     *
9760
     * @see http://unicode.org/reports/tr21/tr21-5.html
9761
     *
9762
     * @param string      $str       <p>The input string.</p>
9763
     * @param bool        $full      [optional] <p>
9764
     *                               <b>true</b>, replace full case folding chars (default)<br>
9765
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9766
     *                               </p>
9767
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9768
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9769
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9770
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9771
     *                               is for some languages better ...</p>
9772
     *
9773
     * @return string
9774
     */
9775 32
    public static function strtocasefold(
9776
        string $str,
9777
        bool $full = true,
9778
        bool $cleanUtf8 = false,
9779
        string $encoding = 'UTF-8',
9780
        string $lang = null,
9781
        $lower = true
9782
    ): string {
9783 32
        if ($str === '') {
9784 5
            return '';
9785
        }
9786
9787 31
        if ($cleanUtf8 === true) {
9788
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9789
            // if invalid characters are found in $haystack before $needle
9790 2
            $str = self::clean($str);
9791
        }
9792
9793 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9794
9795 31
        if ($lang === null && $encoding === 'UTF-8') {
9796 31
            if ($lower === true) {
9797 2
                return \mb_strtolower($str);
9798
            }
9799
9800 29
            return \mb_strtoupper($str);
9801
        }
9802
9803 2
        if ($lower === true) {
9804
            return self::strtolower($str, $encoding, false, $lang);
9805
        }
9806
9807 2
        return self::strtoupper($str, $encoding, false, $lang);
9808
    }
9809
9810
    /**
9811
     * Make a string lowercase.
9812
     *
9813
     * @see http://php.net/manual/en/function.mb-strtolower.php
9814
     *
9815
     * @param string      $str                   <p>The string being lowercased.</p>
9816
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9817
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9818
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9819
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9820
     *
9821
     * @return string
9822
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9823
     */
9824 73
    public static function strtolower(
9825
        $str,
9826
        string $encoding = 'UTF-8',
9827
        bool $cleanUtf8 = false,
9828
        string $lang = null,
9829
        bool $tryToKeepStringLength = false
9830
    ): string {
9831
        // init
9832 73
        $str = (string) $str;
9833
9834 73
        if ($str === '') {
9835 1
            return '';
9836
        }
9837
9838 72
        if ($cleanUtf8 === true) {
9839
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9840
            // if invalid characters are found in $haystack before $needle
9841 2
            $str = self::clean($str);
9842
        }
9843
9844
        // hack for old php version or for the polyfill ...
9845 72
        if ($tryToKeepStringLength === true) {
9846
            $str = self::fixStrCaseHelper($str, true);
9847
        }
9848
9849 72
        if ($lang === null && $encoding === 'UTF-8') {
9850 13
            return \mb_strtolower($str);
9851
        }
9852
9853 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9854
9855 61
        if ($lang !== null) {
9856 2
            if (self::$SUPPORT['intl'] === true) {
9857 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
9858
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
9859
                }
9860
9861 2
                $langCode = $lang . '-Lower';
9862 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
9863
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
9864
9865
                    $langCode = 'Any-Lower';
9866
                }
9867
9868
                /** @noinspection PhpComposerExtensionStubsInspection */
9869
                /** @noinspection UnnecessaryCastingInspection */
9870 2
                return (string) \transliterator_transliterate($langCode, $str);
9871
            }
9872
9873
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9874
        }
9875
9876
        // always fallback via symfony polyfill
9877 61
        return \mb_strtolower($str, $encoding);
9878
    }
9879
9880
    /**
9881
     * Make a string uppercase.
9882
     *
9883
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9884
     *
9885
     * @param string      $str                   <p>The string being uppercased.</p>
9886
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9887
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9888
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9889
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9890
     *
9891
     * @return string
9892
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9893
     */
9894 17
    public static function strtoupper(
9895
        $str,
9896
        string $encoding = 'UTF-8',
9897
        bool $cleanUtf8 = false,
9898
        string $lang = null,
9899
        bool $tryToKeepStringLength = false
9900
    ): string {
9901
        // init
9902 17
        $str = (string) $str;
9903
9904 17
        if ($str === '') {
9905 1
            return '';
9906
        }
9907
9908 16
        if ($cleanUtf8 === true) {
9909
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9910
            // if invalid characters are found in $haystack before $needle
9911 2
            $str = self::clean($str);
9912
        }
9913
9914
        // hack for old php version or for the polyfill ...
9915 16
        if ($tryToKeepStringLength === true) {
9916 2
            $str = self::fixStrCaseHelper($str, false);
9917
        }
9918
9919 16
        if ($lang === null && $encoding === 'UTF-8') {
9920 8
            return \mb_strtoupper($str);
9921
        }
9922
9923 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9924
9925 10
        if ($lang !== null) {
9926 2
            if (self::$SUPPORT['intl'] === true) {
9927 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
9928
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
9929
                }
9930
9931 2
                $langCode = $lang . '-Upper';
9932 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
9933
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
9934
9935
                    $langCode = 'Any-Upper';
9936
                }
9937
9938
                /** @noinspection PhpComposerExtensionStubsInspection */
9939
                /** @noinspection UnnecessaryCastingInspection */
9940 2
                return (string) \transliterator_transliterate($langCode, $str);
9941
            }
9942
9943
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
9944
        }
9945
9946
        // always fallback via symfony polyfill
9947 10
        return \mb_strtoupper($str, $encoding);
9948
    }
9949
9950
    /**
9951
     * Translate characters or replace sub-strings.
9952
     *
9953
     * @see http://php.net/manual/en/function.strtr.php
9954
     *
9955
     * @param string          $str  <p>The string being translated.</p>
9956
     * @param string|string[] $from <p>The string replacing from.</p>
9957
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
9958
     *
9959
     * @return string
9960
     *                This function returns a copy of str, translating all occurrences of each character in from to the
9961
     *                corresponding character in to
9962
     */
9963 2
    public static function strtr(string $str, $from, $to = ''): string
9964
    {
9965 2
        if ($str === '') {
9966
            return '';
9967
        }
9968
9969 2
        if ($from === $to) {
9970
            return $str;
9971
        }
9972
9973 2
        if ($to !== '') {
9974 2
            $from = self::str_split($from);
9975 2
            $to = self::str_split($to);
9976 2
            $countFrom = \count($from);
9977 2
            $countTo = \count($to);
9978
9979 2
            if ($countFrom > $countTo) {
9980 2
                $from = \array_slice($from, 0, $countTo);
9981 2
            } elseif ($countFrom < $countTo) {
9982 2
                $to = \array_slice($to, 0, $countFrom);
9983
            }
9984
9985 2
            $from = \array_combine($from, $to);
9986
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9987 2
            if ($from === false) {
9988
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
9989
            }
9990
        }
9991
9992 2
        if (\is_string($from)) {
9993 2
            return \str_replace($from, '', $str);
9994
        }
9995
9996 2
        return \strtr($str, $from);
9997
    }
9998
9999
    /**
10000
     * Return the width of a string.
10001
     *
10002
     * @param string $str       <p>The input string.</p>
10003
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10004
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10005
     *
10006
     * @return int
10007
     */
10008 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10009
    {
10010 2
        if ($str === '') {
10011 2
            return 0;
10012
        }
10013
10014 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10015 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10016
        }
10017
10018 2
        if ($cleanUtf8 === true) {
10019
            // iconv and mbstring are not tolerant to invalid encoding
10020
            // further, their behaviour is inconsistent with that of PHP's substr
10021 2
            $str = self::clean($str);
10022
        }
10023
10024
        //
10025
        // fallback via mbstring
10026
        //
10027
10028 2
        if (self::$SUPPORT['mbstring'] === true) {
10029 2
            if ($encoding === 'UTF-8') {
10030 2
                return \mb_strwidth($str);
10031
            }
10032
10033
            return \mb_strwidth($str, $encoding);
10034
        }
10035
10036
        //
10037
        // fallback via vanilla php
10038
        //
10039
10040
        if ($encoding !== 'UTF-8') {
10041
            $str = self::encode('UTF-8', $str, false, $encoding);
10042
        }
10043
10044
        $wide = 0;
10045
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10046
10047
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10048
    }
10049
10050
    /**
10051
     * Get part of a string.
10052
     *
10053
     * @see http://php.net/manual/en/function.mb-substr.php
10054
     *
10055
     * @param string $str       <p>The string being checked.</p>
10056
     * @param int    $offset    <p>The first position used in str.</p>
10057
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10058
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10059
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10060
     *
10061
     * @return false|string
10062
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10063
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10064
     *                      characters long, <b>FALSE</b> will be returned.
10065
     */
10066 172
    public static function substr(
10067
        string $str,
10068
        int $offset = 0,
10069
        int $length = null,
10070
        string $encoding = 'UTF-8',
10071
        bool $cleanUtf8 = false
10072
    ) {
10073
        // empty string
10074 172
        if ($str === '' || $length === 0) {
10075 8
            return '';
10076
        }
10077
10078 168
        if ($cleanUtf8 === true) {
10079
            // iconv and mbstring are not tolerant to invalid encoding
10080
            // further, their behaviour is inconsistent with that of PHP's substr
10081 2
            $str = self::clean($str);
10082
        }
10083
10084
        // whole string
10085 168
        if (!$offset && $length === null) {
10086 7
            return $str;
10087
        }
10088
10089 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10090 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10091
        }
10092
10093
        //
10094
        // fallback via mbstring
10095
        //
10096
10097 163
        if (self::$SUPPORT['mbstring'] === true) {
10098 161
            if ($encoding === 'UTF-8') {
10099 161
                if ($length === null) {
10100 64
                    return \mb_substr($str, $offset);
10101
                }
10102
10103 102
                return \mb_substr($str, $offset, $length);
10104
            }
10105
10106
            return self::substr($str, $offset, $length, $encoding);
10107
        }
10108
10109
        //
10110
        // fallback for binary || ascii only
10111
        //
10112
10113
        if (
10114 4
            $encoding === 'CP850'
10115
            ||
10116 4
            $encoding === 'ASCII'
10117
        ) {
10118
            if ($length === null) {
10119
                return \substr($str, $offset);
10120
            }
10121
10122
            return \substr($str, $offset, $length);
10123
        }
10124
10125
        // otherwise we need the string-length
10126 4
        $str_length = 0;
10127 4
        if ($offset || $length === null) {
10128 4
            $str_length = self::strlen($str, $encoding);
10129
        }
10130
10131
        // e.g.: invalid chars + mbstring not installed
10132 4
        if ($str_length === false) {
10133
            return false;
10134
        }
10135
10136
        // empty string
10137 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10138
            return '';
10139
        }
10140
10141
        // impossible
10142 4
        if ($offset && $offset > $str_length) {
10143
            return '';
10144
        }
10145
10146 4
        if ($length === null) {
10147 4
            $length = (int) $str_length;
10148
        } else {
10149 2
            $length = (int) $length;
10150
        }
10151
10152
        if (
10153 4
            $encoding !== 'UTF-8'
10154
            &&
10155 4
            self::$SUPPORT['mbstring'] === false
10156
        ) {
10157 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10158
        }
10159
10160
        //
10161
        // fallback via intl
10162
        //
10163
10164
        if (
10165 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10166
            &&
10167 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10168
            &&
10169 4
            self::$SUPPORT['intl'] === true
10170
        ) {
10171
            $returnTmp = \grapheme_substr($str, $offset, $length);
10172
            if ($returnTmp !== false) {
10173
                return $returnTmp;
10174
            }
10175
        }
10176
10177
        //
10178
        // fallback via iconv
10179
        //
10180
10181
        if (
10182 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10183
            &&
10184 4
            self::$SUPPORT['iconv'] === true
10185
        ) {
10186
            $returnTmp = \iconv_substr($str, $offset, $length);
10187
            if ($returnTmp !== false) {
10188
                return $returnTmp;
10189
            }
10190
        }
10191
10192
        //
10193
        // fallback for ascii only
10194
        //
10195
10196 4
        if (ASCII::is_ascii($str)) {
10197
            return \substr($str, $offset, $length);
10198
        }
10199
10200
        //
10201
        // fallback via vanilla php
10202
        //
10203
10204
        // split to array, and remove invalid characters
10205 4
        $array = self::str_split($str);
10206
10207
        // extract relevant part, and join to make sting again
10208 4
        return \implode('', \array_slice($array, $offset, $length));
10209
    }
10210
10211
    /**
10212
     * Binary safe comparison of two strings from an offset, up to length characters.
10213
     *
10214
     * @param string   $str1               <p>The main string being compared.</p>
10215
     * @param string   $str2               <p>The secondary string being compared.</p>
10216
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10217
     *                                     counting from the end of the string.</p>
10218
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10219
     *                                     of the length of the str compared to the length of main_str less the
10220
     *                                     offset.</p>
10221
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10222
     *                                     insensitive.</p>
10223
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10224
     *
10225
     * @return int
10226
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10227
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10228
     *             <strong>0</strong> if they are equal
10229
     */
10230 2
    public static function substr_compare(
10231
        string $str1,
10232
        string $str2,
10233
        int $offset = 0,
10234
        int $length = null,
10235
        bool $case_insensitivity = false,
10236
        string $encoding = 'UTF-8'
10237
    ): int {
10238
        if (
10239 2
            $offset !== 0
10240
            ||
10241 2
            $length !== null
10242
        ) {
10243 2
            if ($encoding === 'UTF-8') {
10244 2
                if ($length === null) {
10245 2
                    $str1 = (string) \mb_substr($str1, $offset);
10246
                } else {
10247 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10248
                }
10249 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10250
            } else {
10251
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10252
10253
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10254
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10255
            }
10256
        }
10257
10258 2
        if ($case_insensitivity === true) {
10259 2
            return self::strcasecmp($str1, $str2, $encoding);
10260
        }
10261
10262 2
        return self::strcmp($str1, $str2);
10263
    }
10264
10265
    /**
10266
     * Count the number of substring occurrences.
10267
     *
10268
     * @see http://php.net/manual/en/function.substr-count.php
10269
     *
10270
     * @param string $haystack  <p>The string to search in.</p>
10271
     * @param string $needle    <p>The substring to search for.</p>
10272
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10273
     * @param int    $length    [optional] <p>
10274
     *                          The maximum length after the specified offset to search for the
10275
     *                          substring. It outputs a warning if the offset plus the length is
10276
     *                          greater than the haystack length.
10277
     *                          </p>
10278
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10279
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10280
     *
10281
     * @return false|int this functions returns an integer or false if there isn't a string
10282
     */
10283 5
    public static function substr_count(
10284
        string $haystack,
10285
        string $needle,
10286
        int $offset = 0,
10287
        int $length = null,
10288
        string $encoding = 'UTF-8',
10289
        bool $cleanUtf8 = false
10290
    ) {
10291 5
        if ($haystack === '' || $needle === '') {
10292 2
            return false;
10293
        }
10294
10295 5
        if ($length === 0) {
10296 2
            return 0;
10297
        }
10298
10299 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10300 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10301
        }
10302
10303 5
        if ($cleanUtf8 === true) {
10304
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10305
            // if invalid characters are found in $haystack before $needle
10306
            $needle = self::clean($needle);
10307
            $haystack = self::clean($haystack);
10308
        }
10309
10310 5
        if ($offset || $length > 0) {
10311 2
            if ($length === null) {
10312 2
                $lengthTmp = self::strlen($haystack, $encoding);
10313 2
                if ($lengthTmp === false) {
10314
                    return false;
10315
                }
10316 2
                $length = (int) $lengthTmp;
10317
            }
10318
10319 2
            if ($encoding === 'UTF-8') {
10320 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10321
            } else {
10322 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10323
            }
10324
        }
10325
10326
        if (
10327 5
            $encoding !== 'UTF-8'
10328
            &&
10329 5
            self::$SUPPORT['mbstring'] === false
10330
        ) {
10331
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10332
        }
10333
10334 5
        if (self::$SUPPORT['mbstring'] === true) {
10335 5
            if ($encoding === 'UTF-8') {
10336 5
                return \mb_substr_count($haystack, $needle);
10337
            }
10338
10339 2
            return \mb_substr_count($haystack, $needle, $encoding);
10340
        }
10341
10342
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10343
10344
        return \count($matches);
10345
    }
10346
10347
    /**
10348
     * Count the number of substring occurrences.
10349
     *
10350
     * @param string $haystack <p>
10351
     *                         The string being checked.
10352
     *                         </p>
10353
     * @param string $needle   <p>
10354
     *                         The string being found.
10355
     *                         </p>
10356
     * @param int    $offset   [optional] <p>
10357
     *                         The offset where to start counting
10358
     *                         </p>
10359
     * @param int    $length   [optional] <p>
10360
     *                         The maximum length after the specified offset to search for the
10361
     *                         substring. It outputs a warning if the offset plus the length is
10362
     *                         greater than the haystack length.
10363
     *                         </p>
10364
     *
10365
     * @return false|int the number of times the
10366
     *                   needle substring occurs in the
10367
     *                   haystack string
10368
     */
10369
    public static function substr_count_in_byte(
10370
        string $haystack,
10371
        string $needle,
10372
        int $offset = 0,
10373
        int $length = null
10374
    ) {
10375
        if ($haystack === '' || $needle === '') {
10376
            return 0;
10377
        }
10378
10379
        if (
10380
            ($offset || $length !== null)
10381
            &&
10382
            self::$SUPPORT['mbstring_func_overload'] === true
10383
        ) {
10384
            if ($length === null) {
10385
                $lengthTmp = self::strlen($haystack);
10386
                if ($lengthTmp === false) {
10387
                    return false;
10388
                }
10389
                $length = (int) $lengthTmp;
10390
            }
10391
10392
            if (
10393
                (
10394
                    $length !== 0
10395
                    &&
10396
                    $offset !== 0
10397
                )
10398
                &&
10399
                ($length + $offset) <= 0
10400
                &&
10401
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10402
            ) {
10403
                return false;
10404
            }
10405
10406
            $haystackTmp = \substr($haystack, $offset, $length);
10407
            if ($haystackTmp === false) {
10408
                $haystackTmp = '';
10409
            }
10410
            $haystack = (string) $haystackTmp;
10411
        }
10412
10413
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10414
            // "mb_" is available if overload is used, so use it ...
10415
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10416
        }
10417
10418
        if ($length === null) {
10419
            return \substr_count($haystack, $needle, $offset);
10420
        }
10421
10422
        return \substr_count($haystack, $needle, $offset, $length);
10423
    }
10424
10425
    /**
10426
     * Returns the number of occurrences of $substring in the given string.
10427
     * By default, the comparison is case-sensitive, but can be made insensitive
10428
     * by setting $caseSensitive to false.
10429
     *
10430
     * @param string $str           <p>The input string.</p>
10431
     * @param string $substring     <p>The substring to search for.</p>
10432
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10433
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10434
     *
10435
     * @return int
10436
     */
10437 15
    public static function substr_count_simple(
10438
        string $str,
10439
        string $substring,
10440
        bool $caseSensitive = true,
10441
        string $encoding = 'UTF-8'
10442
    ): int {
10443 15
        if ($str === '' || $substring === '') {
10444 2
            return 0;
10445
        }
10446
10447 13
        if ($encoding === 'UTF-8') {
10448 7
            if ($caseSensitive) {
10449
                return (int) \mb_substr_count($str, $substring);
10450
            }
10451
10452 7
            return (int) \mb_substr_count(
10453 7
                \mb_strtoupper($str),
10454 7
                \mb_strtoupper($substring)
10455
            );
10456
        }
10457
10458 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10459
10460 6
        if ($caseSensitive) {
10461 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10462
        }
10463
10464 3
        return (int) \mb_substr_count(
10465 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10466 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10467 3
            $encoding
10468
        );
10469
    }
10470
10471
    /**
10472
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10473
     *
10474
     * @param string $haystack <p>The string to search in.</p>
10475
     * @param string $needle   <p>The substring to search for.</p>
10476
     *
10477
     * @return string return the sub-string
10478
     */
10479 2
    public static function substr_ileft(string $haystack, string $needle): string
10480
    {
10481 2
        if ($haystack === '') {
10482 2
            return '';
10483
        }
10484
10485 2
        if ($needle === '') {
10486 2
            return $haystack;
10487
        }
10488
10489 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10490 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10491
        }
10492
10493 2
        return $haystack;
10494
    }
10495
10496
    /**
10497
     * Get part of a string process in bytes.
10498
     *
10499
     * @param string $str    <p>The string being checked.</p>
10500
     * @param int    $offset <p>The first position used in str.</p>
10501
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10502
     *
10503
     * @return false|string
10504
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10505
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10506
     *                      characters long, <b>FALSE</b> will be returned.
10507
     */
10508
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10509
    {
10510
        // empty string
10511
        if ($str === '' || $length === 0) {
10512
            return '';
10513
        }
10514
10515
        // whole string
10516
        if (!$offset && $length === null) {
10517
            return $str;
10518
        }
10519
10520
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10521
            // "mb_" is available if overload is used, so use it ...
10522
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10523
        }
10524
10525
        return \substr($str, $offset, $length ?? 2147483647);
10526
    }
10527
10528
    /**
10529
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10530
     *
10531
     * @param string $haystack <p>The string to search in.</p>
10532
     * @param string $needle   <p>The substring to search for.</p>
10533
     *
10534
     * @return string return the sub-string
10535
     */
10536 2
    public static function substr_iright(string $haystack, string $needle): string
10537
    {
10538 2
        if ($haystack === '') {
10539 2
            return '';
10540
        }
10541
10542 2
        if ($needle === '') {
10543 2
            return $haystack;
10544
        }
10545
10546 2
        if (self::str_iends_with($haystack, $needle) === true) {
10547 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10548
        }
10549
10550 2
        return $haystack;
10551
    }
10552
10553
    /**
10554
     * Removes an prefix ($needle) from start of the string ($haystack).
10555
     *
10556
     * @param string $haystack <p>The string to search in.</p>
10557
     * @param string $needle   <p>The substring to search for.</p>
10558
     *
10559
     * @return string return the sub-string
10560
     */
10561 2
    public static function substr_left(string $haystack, string $needle): string
10562
    {
10563 2
        if ($haystack === '') {
10564 2
            return '';
10565
        }
10566
10567 2
        if ($needle === '') {
10568 2
            return $haystack;
10569
        }
10570
10571 2
        if (self::str_starts_with($haystack, $needle) === true) {
10572 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10573
        }
10574
10575 2
        return $haystack;
10576
    }
10577
10578
    /**
10579
     * Replace text within a portion of a string.
10580
     *
10581
     * source: https://gist.github.com/stemar/8287074
10582
     *
10583
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10584
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10585
     * @param int|int[]       $offset      <p>
10586
     *                                     If start is positive, the replacing will begin at the start'th offset
10587
     *                                     into string.
10588
     *                                     <br><br>
10589
     *                                     If start is negative, the replacing will begin at the start'th character
10590
     *                                     from the end of string.
10591
     *                                     </p>
10592
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10593
     *                                     portion of string which is to be replaced. If it is negative, it
10594
     *                                     represents the number of characters from the end of string at which to
10595
     *                                     stop replacing. If it is not given, then it will default to strlen(
10596
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10597
     *                                     length is zero then this function will have the effect of inserting
10598
     *                                     replacement into string at the given start offset.</p>
10599
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10600
     *
10601
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10602
     */
10603 10
    public static function substr_replace(
10604
        $str,
10605
        $replacement,
10606
        $offset,
10607
        $length = null,
10608
        string $encoding = 'UTF-8'
10609
    ) {
10610 10
        if (\is_array($str) === true) {
10611 1
            $num = \count($str);
10612
10613
            // the replacement
10614 1
            if (\is_array($replacement) === true) {
10615 1
                $replacement = \array_slice($replacement, 0, $num);
10616
            } else {
10617 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10618
            }
10619
10620
            // the offset
10621 1
            if (\is_array($offset) === true) {
10622 1
                $offset = \array_slice($offset, 0, $num);
10623 1
                foreach ($offset as &$valueTmp) {
10624 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10625
                }
10626 1
                unset($valueTmp);
10627
            } else {
10628 1
                $offset = \array_pad([$offset], $num, $offset);
10629
            }
10630
10631
            // the length
10632 1
            if ($length === null) {
10633 1
                $length = \array_fill(0, $num, 0);
10634 1
            } elseif (\is_array($length) === true) {
10635 1
                $length = \array_slice($length, 0, $num);
10636 1
                foreach ($length as &$valueTmpV2) {
10637 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10638
                }
10639 1
                unset($valueTmpV2);
10640
            } else {
10641 1
                $length = \array_pad([$length], $num, $length);
10642
            }
10643
10644
            // recursive call
10645 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10646
        }
10647
10648 10
        if (\is_array($replacement) === true) {
10649 1
            if (\count($replacement) > 0) {
10650 1
                $replacement = $replacement[0];
10651
            } else {
10652 1
                $replacement = '';
10653
            }
10654
        }
10655
10656
        // init
10657 10
        $str = (string) $str;
10658 10
        $replacement = (string) $replacement;
10659
10660 10
        if (\is_array($length) === true) {
10661
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10662
        }
10663
10664 10
        if (\is_array($offset) === true) {
10665
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10666
        }
10667
10668 10
        if ($str === '') {
10669 1
            return $replacement;
10670
        }
10671
10672 9
        if (self::$SUPPORT['mbstring'] === true) {
10673 9
            $string_length = (int) self::strlen($str, $encoding);
10674
10675 9
            if ($offset < 0) {
10676 1
                $offset = (int) \max(0, $string_length + $offset);
10677 9
            } elseif ($offset > $string_length) {
10678 1
                $offset = $string_length;
10679
            }
10680
10681 9
            if ($length !== null && $length < 0) {
10682 1
                $length = (int) \max(0, $string_length - $offset + $length);
10683 9
            } elseif ($length === null || $length > $string_length) {
10684 4
                $length = $string_length;
10685
            }
10686
10687
            /** @noinspection AdditionOperationOnArraysInspection */
10688 9
            if (($offset + $length) > $string_length) {
10689 4
                $length = $string_length - $offset;
10690
            }
10691
10692
            /** @noinspection AdditionOperationOnArraysInspection */
10693 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10694 9
                   $replacement .
10695 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10696
        }
10697
10698
        //
10699
        // fallback for ascii only
10700
        //
10701
10702
        if (ASCII::is_ascii($str)) {
10703
            return ($length === null) ?
10704
                \substr_replace($str, $replacement, $offset) :
10705
                \substr_replace($str, $replacement, $offset, $length);
10706
        }
10707
10708
        //
10709
        // fallback via vanilla php
10710
        //
10711
10712
        \preg_match_all('/./us', $str, $smatches);
10713
        \preg_match_all('/./us', $replacement, $rmatches);
10714
10715
        if ($length === null) {
10716
            $lengthTmp = self::strlen($str, $encoding);
10717
            if ($lengthTmp === false) {
10718
                // e.g.: non mbstring support + invalid chars
10719
                return '';
10720
            }
10721
            $length = (int) $lengthTmp;
10722
        }
10723
10724
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10725
10726
        return \implode('', $smatches[0]);
10727
    }
10728
10729
    /**
10730
     * Removes an suffix ($needle) from end of the string ($haystack).
10731
     *
10732
     * @param string $haystack <p>The string to search in.</p>
10733
     * @param string $needle   <p>The substring to search for.</p>
10734
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10735
     *
10736
     * @return string return the sub-string
10737
     */
10738 2
    public static function substr_right(
10739
        string $haystack,
10740
        string $needle,
10741
        string $encoding = 'UTF-8'
10742
    ): string {
10743 2
        if ($haystack === '') {
10744 2
            return '';
10745
        }
10746
10747 2
        if ($needle === '') {
10748 2
            return $haystack;
10749
        }
10750
10751
        if (
10752 2
            $encoding === 'UTF-8'
10753
            &&
10754 2
            \substr($haystack, -\strlen($needle)) === $needle
10755
        ) {
10756 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10757
        }
10758
10759 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10760
            return (string) self::substr(
10761
                $haystack,
10762
                0,
10763
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10764
                $encoding
10765
            );
10766
        }
10767
10768 2
        return $haystack;
10769
    }
10770
10771
    /**
10772
     * Returns a case swapped version of the string.
10773
     *
10774
     * @param string $str       <p>The input string.</p>
10775
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10776
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10777
     *
10778
     * @return string each character's case swapped
10779
     */
10780 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10781
    {
10782 6
        if ($str === '') {
10783 1
            return '';
10784
        }
10785
10786 6
        if ($cleanUtf8 === true) {
10787
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10788
            // if invalid characters are found in $haystack before $needle
10789 2
            $str = self::clean($str);
10790
        }
10791
10792 6
        if ($encoding === 'UTF-8') {
10793 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10794
        }
10795
10796 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10797
    }
10798
10799
    /**
10800
     * Checks whether symfony-polyfills are used.
10801
     *
10802
     * @return bool
10803
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10804
     */
10805
    public static function symfony_polyfill_used(): bool
10806
    {
10807
        // init
10808
        $return = false;
10809
10810
        $returnTmp = \extension_loaded('mbstring');
10811
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10812
            $return = true;
10813
        }
10814
10815
        $returnTmp = \extension_loaded('iconv');
10816
        if ($returnTmp === false && \function_exists('iconv')) {
10817
            $return = true;
10818
        }
10819
10820
        return $return;
10821
    }
10822
10823
    /**
10824
     * @param string $str
10825
     * @param int    $tabLength
10826
     *
10827
     * @return string
10828
     */
10829 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10830
    {
10831 6
        if ($tabLength === 4) {
10832 3
            $spaces = '    ';
10833 3
        } elseif ($tabLength === 2) {
10834 1
            $spaces = '  ';
10835
        } else {
10836 2
            $spaces = \str_repeat(' ', $tabLength);
10837
        }
10838
10839 6
        return \str_replace("\t", $spaces, $str);
10840
    }
10841
10842
    /**
10843
     * Converts the first character of each word in the string to uppercase
10844
     * and all other chars to lowercase.
10845
     *
10846
     * @param string      $str                   <p>The input string.</p>
10847
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10848
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10849
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10850
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10851
     *
10852
     * @return string string with all characters of $str being title-cased
10853
     */
10854 5
    public static function titlecase(
10855
        string $str,
10856
        string $encoding = 'UTF-8',
10857
        bool $cleanUtf8 = false,
10858
        string $lang = null,
10859
        bool $tryToKeepStringLength = false
10860
    ): string {
10861 5
        if ($cleanUtf8 === true) {
10862
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10863
            // if invalid characters are found in $haystack before $needle
10864
            $str = self::clean($str);
10865
        }
10866
10867 5
        if ($lang === null && $tryToKeepStringLength === false) {
10868 5
            if ($encoding === 'UTF-8') {
10869 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10870
            }
10871
10872 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10873
10874 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10875
        }
10876
10877
        return self::str_titleize(
10878
            $str,
10879
            null,
10880
            $encoding,
10881
            false,
10882
            $lang,
10883
            $tryToKeepStringLength,
10884
            false
10885
        );
10886
    }
10887
10888
    /**
10889
     * alias for "UTF8::to_ascii()"
10890
     *
10891
     * @param string $str
10892
     * @param string $subst_chr
10893
     * @param bool   $strict
10894
     *
10895
     * @return string
10896
     *
10897
     * @see UTF8::to_ascii()
10898
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
10899
     */
10900 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10901
    {
10902 7
        return self::to_ascii($str, $subst_chr, $strict);
10903
    }
10904
10905
    /**
10906
     * alias for "UTF8::to_iso8859()"
10907
     *
10908
     * @param string|string[] $str
10909
     *
10910
     * @return string|string[]
10911
     *
10912
     * @see UTF8::to_iso8859()
10913
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
10914
     */
10915 2
    public static function toIso8859($str)
10916
    {
10917 2
        return self::to_iso8859($str);
10918
    }
10919
10920
    /**
10921
     * alias for "UTF8::to_latin1()"
10922
     *
10923
     * @param string|string[] $str
10924
     *
10925
     * @return string|string[]
10926
     *
10927
     * @see UTF8::to_latin1()
10928
     * @deprecated <p>please use "UTF8::to_latin1()"</p>
10929
     */
10930 2
    public static function toLatin1($str)
10931
    {
10932 2
        return self::to_latin1($str);
10933
    }
10934
10935
    /**
10936
     * alias for "UTF8::to_utf8()"
10937
     *
10938
     * @param string|string[] $str
10939
     *
10940
     * @return string|string[]
10941
     *
10942
     * @see UTF8::to_utf8()
10943
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
10944
     */
10945 2
    public static function toUTF8($str)
10946
    {
10947 2
        return self::to_utf8($str);
10948
    }
10949
10950
    /**
10951
     * Convert a string into ASCII.
10952
     *
10953
     * @param string $str     <p>The input string.</p>
10954
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
10955
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
10956
     *                        performance</p>
10957
     *
10958
     * @return string
10959
     */
10960 37
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
10961
    {
10962 37
        return ASCII::to_transliterate($str, $unknown, $strict);
10963
    }
10964
10965
    /**
10966
     * @param mixed $str
10967
     *
10968
     * @return bool
10969
     */
10970 19
    public static function to_boolean($str): bool
10971
    {
10972
        // init
10973 19
        $str = (string) $str;
10974
10975 19
        if ($str === '') {
10976 2
            return false;
10977
        }
10978
10979
        // Info: http://php.net/manual/en/filter.filters.validate.php
10980
        $map = [
10981 17
            'true'  => true,
10982
            '1'     => true,
10983
            'on'    => true,
10984
            'yes'   => true,
10985
            'false' => false,
10986
            '0'     => false,
10987
            'off'   => false,
10988
            'no'    => false,
10989
        ];
10990
10991 17
        if (isset($map[$str])) {
10992 11
            return $map[$str];
10993
        }
10994
10995 6
        $key = \strtolower($str);
10996 6
        if (isset($map[$key])) {
10997 2
            return $map[$key];
10998
        }
10999
11000 4
        if (\is_numeric($str)) {
11001 2
            return ((float) $str + 0) > 0;
11002
        }
11003
11004 2
        return (bool) \trim($str);
11005
    }
11006
11007
    /**
11008
     * Convert given string to safe filename (and keep string case).
11009
     *
11010
     * @param string $str
11011
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11012
     *                                  simply replaced with hyphen.
11013
     * @param string $fallback_char
11014
     *
11015
     * @return string
11016
     */
11017 1
    public static function to_filename(
11018
        string $str,
11019
        bool $use_transliterate = false,
11020
        string $fallback_char = '-'
11021
    ): string {
11022 1
        return ASCII::to_filename(
11023 1
            $str,
11024 1
            $use_transliterate,
11025 1
            $fallback_char
11026
        );
11027
    }
11028
11029
    /**
11030
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11031
     *
11032
     * @param string|string[] $str
11033
     *
11034
     * @return string|string[]
11035
     */
11036 8
    public static function to_iso8859($str)
11037
    {
11038 8
        if (\is_array($str) === true) {
11039 2
            foreach ($str as $k => &$v) {
11040 2
                $v = self::to_iso8859($v);
11041
            }
11042
11043 2
            return $str;
11044
        }
11045
11046 8
        $str = (string) $str;
11047 8
        if ($str === '') {
11048 2
            return '';
11049
        }
11050
11051 8
        return self::utf8_decode($str);
11052
    }
11053
11054
    /**
11055
     * alias for "UTF8::to_iso8859()"
11056
     *
11057
     * @param string|string[] $str
11058
     *
11059
     * @return string|string[]
11060
     *
11061
     * @see UTF8::to_iso8859()
11062
     */
11063 2
    public static function to_latin1($str)
11064
    {
11065 2
        return self::to_iso8859($str);
11066
    }
11067
11068
    /**
11069
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11070
     *
11071
     * <ul>
11072
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11073
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11074
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11075
     * case.</li>
11076
     * </ul>
11077
     *
11078
     * @param string|string[] $str                    <p>Any string or array.</p>
11079
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11080
     *
11081
     * @return string|string[] the UTF-8 encoded string
11082
     */
11083 41
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11084
    {
11085 41
        if (\is_array($str) === true) {
11086 4
            foreach ($str as $k => &$v) {
11087 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11088
            }
11089
11090 4
            return $str;
11091
        }
11092
11093 41
        $str = (string) $str;
11094 41
        if ($str === '') {
11095 6
            return $str;
11096
        }
11097
11098 41
        $max = \strlen($str);
11099 41
        $buf = '';
11100
11101 41
        for ($i = 0; $i < $max; ++$i) {
11102 41
            $c1 = $str[$i];
11103
11104 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11105
11106 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11107
11108 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11109
11110 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11111 20
                        $buf .= $c1 . $c2;
11112 20
                        ++$i;
11113
                    } else { // not valid UTF8 - convert it
11114 34
                        $buf .= self::to_utf8_convert_helper($c1);
11115
                    }
11116 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11117
11118 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11119 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11120
11121 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11122 15
                        $buf .= $c1 . $c2 . $c3;
11123 15
                        $i += 2;
11124
                    } else { // not valid UTF8 - convert it
11125 33
                        $buf .= self::to_utf8_convert_helper($c1);
11126
                    }
11127 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11128
11129 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11130 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11131 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11132
11133 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11134 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11135 8
                        $i += 3;
11136
                    } else { // not valid UTF8 - convert it
11137 26
                        $buf .= self::to_utf8_convert_helper($c1);
11138
                    }
11139
                } else { // doesn't look like UTF8, but should be converted
11140
11141 37
                    $buf .= self::to_utf8_convert_helper($c1);
11142
                }
11143 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11144
11145 4
                $buf .= self::to_utf8_convert_helper($c1);
11146
            } else { // it doesn't need conversion
11147
11148 38
                $buf .= $c1;
11149
            }
11150
        }
11151
11152
        // decode unicode escape sequences + unicode surrogate pairs
11153 41
        $buf = \preg_replace_callback(
11154 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11155
            /**
11156
             * @param array $matches
11157
             *
11158
             * @return string
11159
             */
11160
            static function (array $matches): string {
11161 12
                if (isset($matches[3])) {
11162 12
                    $cp = (int) \hexdec($matches[3]);
11163
                } else {
11164
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11165
                    $cp = ((int) \hexdec($matches[1]) << 10)
11166
                          + (int) \hexdec($matches[2])
11167
                          + 0x10000
11168
                          - (0xD800 << 10)
11169
                          - 0xDC00;
11170
                }
11171
11172
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11173
                //
11174
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11175
11176 12
                if ($cp < 0x80) {
11177 8
                    return (string) self::chr($cp);
11178
                }
11179
11180 9
                if ($cp < 0xA0) {
11181
                    /** @noinspection UnnecessaryCastingInspection */
11182
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11183
                }
11184
11185 9
                return self::decimal_to_chr($cp);
11186 41
            },
11187 41
            $buf
11188
        );
11189
11190 41
        if ($buf === null) {
11191
            return '';
11192
        }
11193
11194
        // decode UTF-8 codepoints
11195 41
        if ($decodeHtmlEntityToUtf8 === true) {
11196 2
            $buf = self::html_entity_decode($buf);
11197
        }
11198
11199 41
        return $buf;
11200
    }
11201
11202
    /**
11203
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11204
     *
11205
     * INFO: This is slower then "trim()"
11206
     *
11207
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11208
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11209
     *
11210
     * @param string      $str   <p>The string to be trimmed</p>
11211
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11212
     *
11213
     * @return string the trimmed string
11214
     */
11215 55
    public static function trim(string $str = '', string $chars = null): string
11216
    {
11217 55
        if ($str === '') {
11218 9
            return '';
11219
        }
11220
11221 48
        if ($chars) {
11222 27
            $chars = \preg_quote($chars, '/');
11223 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11224
        } else {
11225 21
            $pattern = '^[\\s]+|[\\s]+$';
11226
        }
11227
11228 48
        if (self::$SUPPORT['mbstring'] === true) {
11229
            /** @noinspection PhpComposerExtensionStubsInspection */
11230 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11231
        }
11232
11233 8
        return self::regex_replace($str, $pattern, '', '', '/');
11234
    }
11235
11236
    /**
11237
     * Makes string's first char uppercase.
11238
     *
11239
     * @param string      $str                   <p>The input string.</p>
11240
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11241
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11242
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11243
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11244
     *
11245
     * @return string the resulting string
11246
     */
11247 69
    public static function ucfirst(
11248
        string $str,
11249
        string $encoding = 'UTF-8',
11250
        bool $cleanUtf8 = false,
11251
        string $lang = null,
11252
        bool $tryToKeepStringLength = false
11253
    ): string {
11254 69
        if ($str === '') {
11255 3
            return '';
11256
        }
11257
11258 68
        if ($cleanUtf8 === true) {
11259
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11260
            // if invalid characters are found in $haystack before $needle
11261 1
            $str = self::clean($str);
11262
        }
11263
11264 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11265
11266 68
        if ($encoding === 'UTF-8') {
11267 22
            $strPartTwo = (string) \mb_substr($str, 1);
11268
11269 22
            if ($useMbFunction === true) {
11270 22
                $strPartOne = \mb_strtoupper(
11271 22
                    (string) \mb_substr($str, 0, 1)
11272
                );
11273
            } else {
11274
                $strPartOne = self::strtoupper(
11275
                    (string) \mb_substr($str, 0, 1),
11276
                    $encoding,
11277
                    false,
11278
                    $lang,
11279 22
                    $tryToKeepStringLength
11280
                );
11281
            }
11282
        } else {
11283 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11284
11285 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11286
11287 47
            if ($useMbFunction === true) {
11288 47
                $strPartOne = \mb_strtoupper(
11289 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11290 47
                    $encoding
11291
                );
11292
            } else {
11293
                $strPartOne = self::strtoupper(
11294
                    (string) self::substr($str, 0, 1, $encoding),
11295
                    $encoding,
11296
                    false,
11297
                    $lang,
11298
                    $tryToKeepStringLength
11299
                );
11300
            }
11301
        }
11302
11303 68
        return $strPartOne . $strPartTwo;
11304
    }
11305
11306
    /**
11307
     * alias for "UTF8::ucfirst()"
11308
     *
11309
     * @param string $str
11310
     * @param string $encoding
11311
     * @param bool   $cleanUtf8
11312
     *
11313
     * @return string
11314
     *
11315
     * @see UTF8::ucfirst()
11316
     */
11317 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11318
    {
11319 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11320
    }
11321
11322
    /**
11323
     * Uppercase for all words in the string.
11324
     *
11325
     * @param string   $str        <p>The input string.</p>
11326
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11327
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11328
     *                             word.</p>
11329
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11330
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11331
     *
11332
     * @return string
11333
     */
11334 8
    public static function ucwords(
11335
        string $str,
11336
        array $exceptions = [],
11337
        string $charlist = '',
11338
        string $encoding = 'UTF-8',
11339
        bool $cleanUtf8 = false
11340
    ): string {
11341 8
        if (!$str) {
11342 2
            return '';
11343
        }
11344
11345
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11346
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11347
11348 7
        if ($cleanUtf8 === true) {
11349
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11350
            // if invalid characters are found in $haystack before $needle
11351 1
            $str = self::clean($str);
11352
        }
11353
11354 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11355
11356
        if (
11357 7
            $usePhpDefaultFunctions === true
11358
            &&
11359 7
            ASCII::is_ascii($str) === true
11360
        ) {
11361
            return \ucwords($str);
11362
        }
11363
11364 7
        $words = self::str_to_words($str, $charlist);
11365 7
        $useExceptions = \count($exceptions) > 0;
11366
11367 7
        foreach ($words as &$word) {
11368 7
            if (!$word) {
11369 7
                continue;
11370
            }
11371
11372
            if (
11373 7
                $useExceptions === false
11374
                ||
11375 7
                !\in_array($word, $exceptions, true)
11376
            ) {
11377 7
                $word = self::ucfirst($word, $encoding);
11378
            }
11379
        }
11380
11381 7
        return \implode('', $words);
11382
    }
11383
11384
    /**
11385
     * Multi decode html entity & fix urlencoded-win1252-chars.
11386
     *
11387
     * e.g:
11388
     * 'test+test'                     => 'test test'
11389
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11390
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11391
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11392
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11393
     * 'Düsseldorf'                   => 'Düsseldorf'
11394
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11395
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11396
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11397
     *
11398
     * @param string $str          <p>The input string.</p>
11399
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11400
     *
11401
     * @return string
11402
     */
11403 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11404
    {
11405 4
        if ($str === '') {
11406 3
            return '';
11407
        }
11408
11409
        if (
11410 4
            \strpos($str, '&') === false
11411
            &&
11412 4
            \strpos($str, '%') === false
11413
            &&
11414 4
            \strpos($str, '+') === false
11415
            &&
11416 4
            \strpos($str, '\u') === false
11417
        ) {
11418 3
            return self::fix_simple_utf8($str);
11419
        }
11420
11421 4
        $str = self::urldecode_unicode_helper($str);
11422
11423
        do {
11424 4
            $str_compare = $str;
11425
11426
            /**
11427
             * @psalm-suppress PossiblyInvalidArgument
11428
             */
11429 4
            $str = self::fix_simple_utf8(
11430 4
                \urldecode(
11431 4
                    self::html_entity_decode(
11432 4
                        self::to_utf8($str),
11433 4
                        \ENT_QUOTES | \ENT_HTML5
11434
                    )
11435
                )
11436
            );
11437 4
        } while ($multi_decode === true && $str_compare !== $str);
11438
11439 4
        return $str;
11440
    }
11441
11442
    /**
11443
     * Return a array with "urlencoded"-win1252 -> UTF-8
11444
     *
11445
     * @return string[]
11446
     *
11447
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11448
     */
11449 2
    public static function urldecode_fix_win1252_chars(): array
11450
    {
11451
        return [
11452 2
            '%20' => ' ',
11453
            '%21' => '!',
11454
            '%22' => '"',
11455
            '%23' => '#',
11456
            '%24' => '$',
11457
            '%25' => '%',
11458
            '%26' => '&',
11459
            '%27' => "'",
11460
            '%28' => '(',
11461
            '%29' => ')',
11462
            '%2A' => '*',
11463
            '%2B' => '+',
11464
            '%2C' => ',',
11465
            '%2D' => '-',
11466
            '%2E' => '.',
11467
            '%2F' => '/',
11468
            '%30' => '0',
11469
            '%31' => '1',
11470
            '%32' => '2',
11471
            '%33' => '3',
11472
            '%34' => '4',
11473
            '%35' => '5',
11474
            '%36' => '6',
11475
            '%37' => '7',
11476
            '%38' => '8',
11477
            '%39' => '9',
11478
            '%3A' => ':',
11479
            '%3B' => ';',
11480
            '%3C' => '<',
11481
            '%3D' => '=',
11482
            '%3E' => '>',
11483
            '%3F' => '?',
11484
            '%40' => '@',
11485
            '%41' => 'A',
11486
            '%42' => 'B',
11487
            '%43' => 'C',
11488
            '%44' => 'D',
11489
            '%45' => 'E',
11490
            '%46' => 'F',
11491
            '%47' => 'G',
11492
            '%48' => 'H',
11493
            '%49' => 'I',
11494
            '%4A' => 'J',
11495
            '%4B' => 'K',
11496
            '%4C' => 'L',
11497
            '%4D' => 'M',
11498
            '%4E' => 'N',
11499
            '%4F' => 'O',
11500
            '%50' => 'P',
11501
            '%51' => 'Q',
11502
            '%52' => 'R',
11503
            '%53' => 'S',
11504
            '%54' => 'T',
11505
            '%55' => 'U',
11506
            '%56' => 'V',
11507
            '%57' => 'W',
11508
            '%58' => 'X',
11509
            '%59' => 'Y',
11510
            '%5A' => 'Z',
11511
            '%5B' => '[',
11512
            '%5C' => '\\',
11513
            '%5D' => ']',
11514
            '%5E' => '^',
11515
            '%5F' => '_',
11516
            '%60' => '`',
11517
            '%61' => 'a',
11518
            '%62' => 'b',
11519
            '%63' => 'c',
11520
            '%64' => 'd',
11521
            '%65' => 'e',
11522
            '%66' => 'f',
11523
            '%67' => 'g',
11524
            '%68' => 'h',
11525
            '%69' => 'i',
11526
            '%6A' => 'j',
11527
            '%6B' => 'k',
11528
            '%6C' => 'l',
11529
            '%6D' => 'm',
11530
            '%6E' => 'n',
11531
            '%6F' => 'o',
11532
            '%70' => 'p',
11533
            '%71' => 'q',
11534
            '%72' => 'r',
11535
            '%73' => 's',
11536
            '%74' => 't',
11537
            '%75' => 'u',
11538
            '%76' => 'v',
11539
            '%77' => 'w',
11540
            '%78' => 'x',
11541
            '%79' => 'y',
11542
            '%7A' => 'z',
11543
            '%7B' => '{',
11544
            '%7C' => '|',
11545
            '%7D' => '}',
11546
            '%7E' => '~',
11547
            '%7F' => '',
11548
            '%80' => '`',
11549
            '%81' => '',
11550
            '%82' => '‚',
11551
            '%83' => 'ƒ',
11552
            '%84' => '„',
11553
            '%85' => '…',
11554
            '%86' => '†',
11555
            '%87' => '‡',
11556
            '%88' => 'ˆ',
11557
            '%89' => '‰',
11558
            '%8A' => 'Š',
11559
            '%8B' => '‹',
11560
            '%8C' => 'Œ',
11561
            '%8D' => '',
11562
            '%8E' => 'Ž',
11563
            '%8F' => '',
11564
            '%90' => '',
11565
            '%91' => '‘',
11566
            '%92' => '’',
11567
            '%93' => '“',
11568
            '%94' => '”',
11569
            '%95' => '•',
11570
            '%96' => '–',
11571
            '%97' => '—',
11572
            '%98' => '˜',
11573
            '%99' => '™',
11574
            '%9A' => 'š',
11575
            '%9B' => '›',
11576
            '%9C' => 'œ',
11577
            '%9D' => '',
11578
            '%9E' => 'ž',
11579
            '%9F' => 'Ÿ',
11580
            '%A0' => '',
11581
            '%A1' => '¡',
11582
            '%A2' => '¢',
11583
            '%A3' => '£',
11584
            '%A4' => '¤',
11585
            '%A5' => '¥',
11586
            '%A6' => '¦',
11587
            '%A7' => '§',
11588
            '%A8' => '¨',
11589
            '%A9' => '©',
11590
            '%AA' => 'ª',
11591
            '%AB' => '«',
11592
            '%AC' => '¬',
11593
            '%AD' => '',
11594
            '%AE' => '®',
11595
            '%AF' => '¯',
11596
            '%B0' => '°',
11597
            '%B1' => '±',
11598
            '%B2' => '²',
11599
            '%B3' => '³',
11600
            '%B4' => '´',
11601
            '%B5' => 'µ',
11602
            '%B6' => '¶',
11603
            '%B7' => '·',
11604
            '%B8' => '¸',
11605
            '%B9' => '¹',
11606
            '%BA' => 'º',
11607
            '%BB' => '»',
11608
            '%BC' => '¼',
11609
            '%BD' => '½',
11610
            '%BE' => '¾',
11611
            '%BF' => '¿',
11612
            '%C0' => 'À',
11613
            '%C1' => 'Á',
11614
            '%C2' => 'Â',
11615
            '%C3' => 'Ã',
11616
            '%C4' => 'Ä',
11617
            '%C5' => 'Å',
11618
            '%C6' => 'Æ',
11619
            '%C7' => 'Ç',
11620
            '%C8' => 'È',
11621
            '%C9' => 'É',
11622
            '%CA' => 'Ê',
11623
            '%CB' => 'Ë',
11624
            '%CC' => 'Ì',
11625
            '%CD' => 'Í',
11626
            '%CE' => 'Î',
11627
            '%CF' => 'Ï',
11628
            '%D0' => 'Ð',
11629
            '%D1' => 'Ñ',
11630
            '%D2' => 'Ò',
11631
            '%D3' => 'Ó',
11632
            '%D4' => 'Ô',
11633
            '%D5' => 'Õ',
11634
            '%D6' => 'Ö',
11635
            '%D7' => '×',
11636
            '%D8' => 'Ø',
11637
            '%D9' => 'Ù',
11638
            '%DA' => 'Ú',
11639
            '%DB' => 'Û',
11640
            '%DC' => 'Ü',
11641
            '%DD' => 'Ý',
11642
            '%DE' => 'Þ',
11643
            '%DF' => 'ß',
11644
            '%E0' => 'à',
11645
            '%E1' => 'á',
11646
            '%E2' => 'â',
11647
            '%E3' => 'ã',
11648
            '%E4' => 'ä',
11649
            '%E5' => 'å',
11650
            '%E6' => 'æ',
11651
            '%E7' => 'ç',
11652
            '%E8' => 'è',
11653
            '%E9' => 'é',
11654
            '%EA' => 'ê',
11655
            '%EB' => 'ë',
11656
            '%EC' => 'ì',
11657
            '%ED' => 'í',
11658
            '%EE' => 'î',
11659
            '%EF' => 'ï',
11660
            '%F0' => 'ð',
11661
            '%F1' => 'ñ',
11662
            '%F2' => 'ò',
11663
            '%F3' => 'ó',
11664
            '%F4' => 'ô',
11665
            '%F5' => 'õ',
11666
            '%F6' => 'ö',
11667
            '%F7' => '÷',
11668
            '%F8' => 'ø',
11669
            '%F9' => 'ù',
11670
            '%FA' => 'ú',
11671
            '%FB' => 'û',
11672
            '%FC' => 'ü',
11673
            '%FD' => 'ý',
11674
            '%FE' => 'þ',
11675
            '%FF' => 'ÿ',
11676
        ];
11677
    }
11678
11679
    /**
11680
     * Decodes an UTF-8 string to ISO-8859-1.
11681
     *
11682
     * @param string $str           <p>The input string.</p>
11683
     * @param bool   $keepUtf8Chars
11684
     *
11685
     * @return string
11686
     */
11687 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11688
    {
11689 14
        if ($str === '') {
11690 6
            return '';
11691
        }
11692
11693
        // save for later comparision
11694 14
        $str_backup = $str;
11695 14
        $len = \strlen($str);
11696
11697 14
        if (self::$ORD === null) {
11698
            self::$ORD = self::getData('ord');
11699
        }
11700
11701 14
        if (self::$CHR === null) {
11702
            self::$CHR = self::getData('chr');
11703
        }
11704
11705 14
        $noCharFound = '?';
11706
        /** @noinspection ForeachInvariantsInspection */
11707 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11708 14
            switch ($str[$i] & "\xF0") {
11709 14
                case "\xC0":
11710 13
                case "\xD0":
11711 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11712 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11713
11714 13
                    break;
11715
11716
                /** @noinspection PhpMissingBreakStatementInspection */
11717 13
                case "\xF0":
11718
                    ++$i;
11719
11720
                // no break
11721
11722 13
                case "\xE0":
11723 11
                    $str[$j] = $noCharFound;
11724 11
                    $i += 2;
11725
11726 11
                    break;
11727
11728
                default:
11729 12
                    $str[$j] = $str[$i];
11730
            }
11731
        }
11732
11733 14
        $return = \substr($str, 0, $j);
11734 14
        if ($return === false) {
11735
            $return = '';
11736
        }
11737
11738
        if (
11739 14
            $keepUtf8Chars === true
11740
            &&
11741 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11742
        ) {
11743 2
            return $str_backup;
11744
        }
11745
11746 14
        return $return;
11747
    }
11748
11749
    /**
11750
     * Encodes an ISO-8859-1 string to UTF-8.
11751
     *
11752
     * @param string $str <p>The input string.</p>
11753
     *
11754
     * @return string
11755
     */
11756 14
    public static function utf8_encode(string $str): string
11757
    {
11758 14
        if ($str === '') {
11759 14
            return '';
11760
        }
11761
11762 14
        $str = \utf8_encode($str);
11763
11764
        // the polyfill maybe return false
11765
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11766
        /** @psalm-suppress TypeDoesNotContainType */
11767 14
        if ($str === false) {
11768
            return '';
11769
        }
11770
11771 14
        return $str;
11772
    }
11773
11774
    /**
11775
     * fix -> utf8-win1252 chars
11776
     *
11777
     * @param string $str <p>The input string.</p>
11778
     *
11779
     * @return string
11780
     *
11781
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
11782
     */
11783 2
    public static function utf8_fix_win1252_chars(string $str): string
11784
    {
11785 2
        return self::fix_simple_utf8($str);
11786
    }
11787
11788
    /**
11789
     * Returns an array with all utf8 whitespace characters.
11790
     *
11791
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11792
     *
11793
     * @return string[]
11794
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
11795
     *                  as defined in above URL
11796
     */
11797 2
    public static function whitespace_table(): array
11798
    {
11799 2
        return self::$WHITESPACE_TABLE;
11800
    }
11801
11802
    /**
11803
     * Limit the number of words in a string.
11804
     *
11805
     * @param string $str      <p>The input string.</p>
11806
     * @param int    $limit    <p>The limit of words as integer.</p>
11807
     * @param string $strAddOn <p>Replacement for the striped string.</p>
11808
     *
11809
     * @return string
11810
     */
11811 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
11812
    {
11813 2
        if ($str === '' || $limit < 1) {
11814 2
            return '';
11815
        }
11816
11817 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
11818
11819
        if (
11820 2
            !isset($matches[0])
11821
            ||
11822 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
11823
        ) {
11824 2
            return $str;
11825
        }
11826
11827 2
        return \rtrim($matches[0]) . $strAddOn;
11828
    }
11829
11830
    /**
11831
     * Wraps a string to a given number of characters
11832
     *
11833
     * @see http://php.net/manual/en/function.wordwrap.php
11834
     *
11835
     * @param string $str   <p>The input string.</p>
11836
     * @param int    $width [optional] <p>The column width.</p>
11837
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
11838
     * @param bool   $cut   [optional] <p>
11839
     *                      If the cut is set to true, the string is
11840
     *                      always wrapped at or before the specified width. So if you have
11841
     *                      a word that is larger than the given width, it is broken apart.
11842
     *                      </p>
11843
     *
11844
     * @return string
11845
     *                <p>The given string wrapped at the specified column.</p>
11846
     */
11847 12
    public static function wordwrap(
11848
        string $str,
11849
        int $width = 75,
11850
        string $break = "\n",
11851
        bool $cut = false
11852
    ): string {
11853 12
        if ($str === '' || $break === '') {
11854 4
            return '';
11855
        }
11856
11857 10
        $strSplit = \explode($break, $str);
11858 10
        if ($strSplit === false) {
11859
            return '';
11860
        }
11861
11862 10
        $chars = [];
11863 10
        $wordSplit = '';
11864 10
        foreach ($strSplit as $i => $iValue) {
11865 10
            if ($i) {
11866 3
                $chars[] = $break;
11867 3
                $wordSplit .= '#';
11868
            }
11869
11870 10
            foreach (self::str_split($iValue) as $c) {
11871 10
                $chars[] = $c;
11872 10
                if ($c === ' ') {
11873 3
                    $wordSplit .= ' ';
11874
                } else {
11875 10
                    $wordSplit .= '?';
11876
                }
11877
            }
11878
        }
11879
11880 10
        $strReturn = '';
11881 10
        $j = 0;
11882 10
        $b = -1;
11883 10
        $i = -1;
11884 10
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
11885
11886 10
        $max = \mb_strlen($wordSplit);
11887 10
        while (($b = \mb_strpos($wordSplit, '#', $b + 1)) !== false) {
11888 8
            for (++$i; $i < $b; ++$i) {
11889 8
                $strReturn .= $chars[$j];
11890 8
                unset($chars[$j++]);
11891
11892
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
11893 8
                if ($i > $max) {
11894
                    break 2;
11895
                }
11896
            }
11897
11898
            if (
11899 8
                $break === $chars[$j]
11900
                ||
11901 8
                $chars[$j] === ' '
11902
            ) {
11903 5
                unset($chars[$j++]);
11904
            }
11905
11906 8
            $strReturn .= $break;
11907
11908
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
11909 8
            if ($b > $max) {
11910
                break;
11911
            }
11912
        }
11913
11914 10
        return $strReturn . \implode('', $chars);
11915
    }
11916
11917
    /**
11918
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
11919
     *    ... so that we wrap the per line.
11920
     *
11921
     * @param string      $str           <p>The input string.</p>
11922
     * @param int         $width         [optional] <p>The column width.</p>
11923
     * @param string      $break         [optional] <p>The line is broken using the optional break parameter.</p>
11924
     * @param bool        $cut           [optional] <p>
11925
     *                                   If the cut is set to true, the string is
11926
     *                                   always wrapped at or before the specified width. So if you have
11927
     *                                   a word that is larger than the given width, it is broken apart.
11928
     *                                   </p>
11929
     * @param bool        $addFinalBreak [optional] <p>
11930
     *                                   If this flag is true, then the method will add a $break at the end
11931
     *                                   of the result string.
11932
     *                                   </p>
11933
     * @param string|null $delimiter     [optional] <p>
11934
     *                                   You can change the default behavior, where we split the string by newline.
11935
     *                                   </p>
11936
     *
11937
     * @return string
11938
     */
11939 1
    public static function wordwrap_per_line(
11940
        string $str,
11941
        int $width = 75,
11942
        string $break = "\n",
11943
        bool $cut = false,
11944
        bool $addFinalBreak = true,
11945
        string $delimiter = null
11946
    ): string {
11947 1
        if ($delimiter === null) {
11948 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
11949
        } else {
11950 1
            $strings = \explode($delimiter, $str);
11951
        }
11952
11953 1
        $stringArray = [];
11954 1
        if ($strings !== false) {
11955 1
            foreach ($strings as $value) {
11956 1
                $stringArray[] = self::wordwrap($value, $width, $break, $cut);
11957
            }
11958
        }
11959
11960 1
        if ($addFinalBreak) {
11961 1
            $finalBreak = $break;
11962
        } else {
11963 1
            $finalBreak = '';
11964
        }
11965
11966 1
        return \implode($delimiter ?? "\n", $stringArray) . $finalBreak;
11967
    }
11968
11969
    /**
11970
     * Returns an array of Unicode White Space characters.
11971
     *
11972
     * @return string[] an array with numeric code point as key and White Space Character as value
11973
     */
11974 2
    public static function ws(): array
11975
    {
11976 2
        return self::$WHITESPACE;
11977
    }
11978
11979
    /**
11980
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
11981
     *
11982
     * @see http://hsivonen.iki.fi/php-utf8/
11983
     *
11984
     * @param string $str    <p>The string to be checked.</p>
11985
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
11986
     *
11987
     * @return bool
11988
     */
11989 108
    private static function is_utf8_string(string $str, bool $strict = false): bool
11990
    {
11991 108
        if ($str === '') {
11992 14
            return true;
11993
        }
11994
11995 102
        if ($strict === true) {
11996 2
            $isBinary = self::is_binary($str, true);
11997
11998 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
11999 2
                return false;
12000
            }
12001
12002
            if ($isBinary && self::is_utf32($str, false) !== false) {
12003
                return false;
12004
            }
12005
        }
12006
12007 102
        if (self::pcre_utf8_support() !== true) {
12008
            // If even just the first character can be matched, when the /u
12009
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12010
            // invalid, nothing at all will match, even if the string contains
12011
            // some valid sequences
12012
            return \preg_match('/^./us', $str, $ar) === 1;
12013
        }
12014
12015 102
        $mState = 0; // cached expected number of octets after the current octet
12016
        // until the beginning of the next UTF8 character sequence
12017 102
        $mUcs4 = 0; // cached Unicode character
12018 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12019
12020 102
        if (self::$ORD === null) {
12021
            self::$ORD = self::getData('ord');
12022
        }
12023
12024 102
        $len = \strlen((string) $str);
12025
        /** @noinspection ForeachInvariantsInspection */
12026 102
        for ($i = 0; $i < $len; ++$i) {
12027 102
            $in = self::$ORD[$str[$i]];
12028
12029 102
            if ($mState === 0) {
12030
                // When mState is zero we expect either a US-ASCII character or a
12031
                // multi-octet sequence.
12032 102
                if ((0x80 & $in) === 0) {
12033
                    // US-ASCII, pass straight through.
12034 97
                    $mBytes = 1;
12035 83
                } elseif ((0xE0 & $in) === 0xC0) {
12036
                    // First octet of 2 octet sequence.
12037 73
                    $mUcs4 = $in;
12038 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12039 73
                    $mState = 1;
12040 73
                    $mBytes = 2;
12041 58
                } elseif ((0xF0 & $in) === 0xE0) {
12042
                    // First octet of 3 octet sequence.
12043 42
                    $mUcs4 = $in;
12044 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12045 42
                    $mState = 2;
12046 42
                    $mBytes = 3;
12047 29
                } elseif ((0xF8 & $in) === 0xF0) {
12048
                    // First octet of 4 octet sequence.
12049 18
                    $mUcs4 = $in;
12050 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12051 18
                    $mState = 3;
12052 18
                    $mBytes = 4;
12053 13
                } elseif ((0xFC & $in) === 0xF8) {
12054
                    /* First octet of 5 octet sequence.
12055
                     *
12056
                     * This is illegal because the encoded codepoint must be either
12057
                     * (a) not the shortest form or
12058
                     * (b) outside the Unicode range of 0-0x10FFFF.
12059
                     * Rather than trying to resynchronize, we will carry on until the end
12060
                     * of the sequence and let the later error handling code catch it.
12061
                     */
12062 5
                    $mUcs4 = $in;
12063 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12064 5
                    $mState = 4;
12065 5
                    $mBytes = 5;
12066 10
                } elseif ((0xFE & $in) === 0xFC) {
12067
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12068 5
                    $mUcs4 = $in;
12069 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12070 5
                    $mState = 5;
12071 5
                    $mBytes = 6;
12072
                } else {
12073
                    // Current octet is neither in the US-ASCII range nor a legal first
12074
                    // octet of a multi-octet sequence.
12075 102
                    return false;
12076
                }
12077 83
            } elseif ((0xC0 & $in) === 0x80) {
12078
12079
                // When mState is non-zero, we expect a continuation of the multi-octet
12080
                // sequence
12081
12082
                // Legal continuation.
12083 75
                $shift = ($mState - 1) * 6;
12084 75
                $tmp = $in;
12085 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12086 75
                $mUcs4 |= $tmp;
12087
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12088
                // Unicode code point to be output.
12089 75
                if (--$mState === 0) {
12090
                    // Check for illegal sequences and code points.
12091
                    //
12092
                    // From Unicode 3.1, non-shortest form is illegal
12093
                    if (
12094 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12095
                        ||
12096 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12097
                        ||
12098 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12099
                        ||
12100 75
                        ($mBytes > 4)
12101
                        ||
12102
                        // From Unicode 3.2, surrogate characters are illegal.
12103 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12104
                        ||
12105
                        // Code points outside the Unicode range are illegal.
12106 75
                        ($mUcs4 > 0x10FFFF)
12107
                    ) {
12108 8
                        return false;
12109
                    }
12110
                    // initialize UTF8 cache
12111 75
                    $mState = 0;
12112 75
                    $mUcs4 = 0;
12113 75
                    $mBytes = 1;
12114
                }
12115
            } else {
12116
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12117
                // Incomplete multi-octet sequence.
12118 35
                return false;
12119
            }
12120
        }
12121
12122 67
        return true;
12123
    }
12124
12125
    /**
12126
     * @param string $str
12127
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12128
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12129
     *
12130
     * @return string
12131
     */
12132 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12133
    {
12134 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12135 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12136
12137 33
        if ($useLower === true) {
12138 2
            $str = \str_replace(
12139 2
                $upper,
12140 2
                $lower,
12141 2
                $str
12142
            );
12143
        } else {
12144 31
            $str = \str_replace(
12145 31
                $lower,
12146 31
                $upper,
12147 31
                $str
12148
            );
12149
        }
12150
12151 33
        if ($fullCaseFold) {
12152 31
            static $FULL_CASE_FOLD = null;
12153 31
            if ($FULL_CASE_FOLD === null) {
12154 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12155
            }
12156
12157 31
            if ($useLower === true) {
12158 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12159
            } else {
12160 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12161
            }
12162
        }
12163
12164 33
        return $str;
12165
    }
12166
12167
    /**
12168
     * get data from "/data/*.php"
12169
     *
12170
     * @param string $file
12171
     *
12172
     * @return array
12173
     */
12174 6
    private static function getData(string $file): array
12175
    {
12176
        /** @noinspection PhpIncludeInspection */
12177
        /** @noinspection UsingInclusionReturnValueInspection */
12178
        /** @psalm-suppress UnresolvableInclude */
12179 6
        return include __DIR__ . '/data/' . $file . '.php';
12180
    }
12181
12182
    /**
12183
     * @return true|null
12184
     */
12185 12
    private static function initEmojiData()
12186
    {
12187 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12188 1
            if (self::$EMOJI === null) {
12189 1
                self::$EMOJI = self::getData('emoji');
12190
            }
12191
12192 1
            \uksort(
12193 1
                self::$EMOJI,
12194
                static function (string $a, string $b): int {
12195 1
                    return \strlen($b) <=> \strlen($a);
12196 1
                }
12197
            );
12198
12199 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12200 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12201
12202 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12203 1
                $tmpKey = \crc32($key);
12204 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12205
            }
12206
12207 1
            return true;
12208
        }
12209
12210 12
        return null;
12211
    }
12212
12213
    /**
12214
     * Checks whether mbstring "overloaded" is active on the server.
12215
     *
12216
     * @return bool
12217
     */
12218
    private static function mbstring_overloaded(): bool
12219
    {
12220
        /**
12221
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12222
         */
12223
12224
        /** @noinspection PhpComposerExtensionStubsInspection */
12225
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12226
        return \defined('MB_OVERLOAD_STRING')
12227
               &&
12228
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12229
    }
12230
12231
    /**
12232
     * @param array $strings
12233
     * @param bool  $removeEmptyValues
12234
     * @param int   $removeShortValues
12235
     *
12236
     * @return array
12237
     */
12238 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12239
    {
12240
        // init
12241 2
        $return = [];
12242
12243 2
        foreach ($strings as &$str) {
12244
            if (
12245 2
                $removeShortValues !== null
12246
                &&
12247 2
                \mb_strlen($str) <= $removeShortValues
12248
            ) {
12249 2
                continue;
12250
            }
12251
12252
            if (
12253 2
                $removeEmptyValues === true
12254
                &&
12255 2
                \trim($str) === ''
12256
            ) {
12257 2
                continue;
12258
            }
12259
12260 2
            $return[] = $str;
12261
        }
12262
12263 2
        return $return;
12264
    }
12265
12266
    /**
12267
     * rxClass
12268
     *
12269
     * @param string $s
12270
     * @param string $class
12271
     *
12272
     * @return string
12273
     */
12274 33
    private static function rxClass(string $s, string $class = ''): string
12275
    {
12276 33
        static $RX_CLASS_CACHE = [];
12277
12278 33
        $cacheKey = $s . $class;
12279
12280 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12281 21
            return $RX_CLASS_CACHE[$cacheKey];
12282
        }
12283
12284 16
        $classArray = [$class];
12285
12286
        /** @noinspection SuspiciousLoopInspection */
12287
        /** @noinspection AlterInForeachInspection */
12288 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12289 15
            if ($s === '-') {
12290
                $classArray[0] = '-' . $classArray[0];
12291 15
            } elseif (!isset($s[2])) {
12292 15
                $classArray[0] .= \preg_quote($s, '/');
12293 1
            } elseif (self::strlen($s) === 1) {
12294 1
                $classArray[0] .= $s;
12295
            } else {
12296 15
                $classArray[] = $s;
12297
            }
12298
        }
12299
12300 16
        if ($classArray[0]) {
12301 16
            $classArray[0] = '[' . $classArray[0] . ']';
12302
        }
12303
12304 16
        if (\count($classArray) === 1) {
12305 16
            $return = $classArray[0];
12306
        } else {
12307
            $return = '(?:' . \implode('|', $classArray) . ')';
12308
        }
12309
12310 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12311
12312 16
        return $return;
12313
    }
12314
12315
    /**
12316
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12317
     *
12318
     * @param string $names
12319
     * @param string $delimiter
12320
     * @param string $encoding
12321
     *
12322
     * @return string
12323
     */
12324 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12325
    {
12326
        // init
12327 1
        $namesArray = \explode($delimiter, $names);
12328
12329 1
        if ($namesArray === false) {
12330
            return '';
12331
        }
12332
12333
        $specialCases = [
12334 1
            'names' => [
12335
                'ab',
12336
                'af',
12337
                'al',
12338
                'and',
12339
                'ap',
12340
                'bint',
12341
                'binte',
12342
                'da',
12343
                'de',
12344
                'del',
12345
                'den',
12346
                'der',
12347
                'di',
12348
                'dit',
12349
                'ibn',
12350
                'la',
12351
                'mac',
12352
                'nic',
12353
                'of',
12354
                'ter',
12355
                'the',
12356
                'und',
12357
                'van',
12358
                'von',
12359
                'y',
12360
                'zu',
12361
            ],
12362
            'prefixes' => [
12363
                'al-',
12364
                "d'",
12365
                'ff',
12366
                "l'",
12367
                'mac',
12368
                'mc',
12369
                'nic',
12370
            ],
12371
        ];
12372
12373 1
        foreach ($namesArray as &$name) {
12374 1
            if (\in_array($name, $specialCases['names'], true)) {
12375 1
                continue;
12376
            }
12377
12378 1
            $continue = false;
12379
12380 1
            if ($delimiter === '-') {
12381
                /** @noinspection AlterInForeachInspection */
12382 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12383 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12384 1
                        $continue = true;
12385
                    }
12386
                }
12387
            }
12388
12389
            /** @noinspection AlterInForeachInspection */
12390 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12391 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12392 1
                    $continue = true;
12393
                }
12394
            }
12395
12396 1
            if ($continue === true) {
12397 1
                continue;
12398
            }
12399
12400 1
            $name = self::ucfirst($name);
12401
        }
12402
12403 1
        return \implode($delimiter, $namesArray);
12404
    }
12405
12406
    /**
12407
     * Generic case sensitive transformation for collation matching.
12408
     *
12409
     * @param string $str <p>The input string</p>
12410
     *
12411
     * @return string|null
12412
     */
12413 6
    private static function strtonatfold(string $str)
12414
    {
12415 6
        return \preg_replace(
12416 6
            '/\p{Mn}+/u',
12417 6
            '',
12418 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12419
        );
12420
    }
12421
12422
    /**
12423
     * @param int|string $input
12424
     *
12425
     * @return string
12426
     */
12427 31
    private static function to_utf8_convert_helper($input): string
12428
    {
12429
        // init
12430 31
        $buf = '';
12431
12432 31
        if (self::$ORD === null) {
12433 1
            self::$ORD = self::getData('ord');
12434
        }
12435
12436 31
        if (self::$CHR === null) {
12437 1
            self::$CHR = self::getData('chr');
12438
        }
12439
12440 31
        if (self::$WIN1252_TO_UTF8 === null) {
12441 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12442
        }
12443
12444 31
        $ordC1 = self::$ORD[$input];
12445 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12446 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12447
        } else {
12448
            /** @noinspection OffsetOperationsInspection */
12449 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12450 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12451 1
            $buf .= $cc1 . $cc2;
12452
        }
12453
12454 31
        return $buf;
12455
    }
12456
12457
    /**
12458
     * @param string $str
12459
     *
12460
     * @return string
12461
     */
12462 9
    private static function urldecode_unicode_helper(string $str): string
12463
    {
12464 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12465 9
        if (\preg_match($pattern, $str)) {
12466 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12467
        }
12468
12469 9
        return $str;
12470
    }
12471
}
12472