Passed
Push — master ( c8f5a7...7a8579 )
by Lars
03:43
created

UTF8   F

Complexity

Total Complexity 1688

Size/Duplication

Total Lines 12682
Duplicated Lines 0 %

Test Coverage

Coverage 80.01%

Importance

Changes 90
Bugs 49 Features 4
Metric Value
eloc 4311
dl 0
loc 12682
ccs 3045
cts 3806
cp 0.8001
rs 0.8
c 90
b 49
f 4
wmc 1688

299 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A __construct() 0 2 1
A access() 0 11 4
A mbstring_overloaded() 0 11 2
A chr_to_decimal() 0 30 6
A file_has_bom() 0 8 2
A filter_input() 0 13 2
A array_change_key_case() 0 23 5
A get_unique_string() 0 15 2
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A emoji_decode() 0 18 2
D chr() 0 101 18
B get_file_type() 0 59 7
A chr_to_int() 0 3 1
C filter() 0 59 13
A decode_mimeheader() 0 15 5
A html_decode() 0 6 1
A chunk_split() 0 3 1
A emoji_encode() 0 18 2
B get_random_string() 0 56 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
D getCharDirection() 0 105 118
A filter_var_array() 0 12 2
A decimal_to_chr() 0 3 1
B between() 0 48 8
A codepoints() 0 29 4
A chr_map() 0 5 1
A cleanup() 0 25 2
A char_at() 0 7 2
A chars() 0 3 1
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A chr_size_list() 0 17 3
F extract_text() 0 175 34
A hasBom() 0 3 1
A filter_var() 0 12 2
F encode() 0 140 37
A fix_simple_utf8() 0 19 4
A checkForSupport() 0 47 4
A has_lowercase() 0 8 2
A hex_to_int() 0 14 3
A hex_to_chr() 0 3 1
A filter_input_array() 0 12 2
A getSupportInfo() 0 13 3
A chr_to_hex() 0 11 3
A collapse_whitespace() 0 8 2
B file_get_contents() 0 56 11
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A str_substr_after_first_separator() 0 28 6
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 70 10
A parse_str() 0 16 4
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 99 19
A html_entity_decode_helper() 0 18 3
A str_isubstr_last() 0 25 4
A str_replace_beginning() 0 24 6
A remove_left() 0 24 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
C utf8_decode() 0 60 13
A ltrim() 0 19 4
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A lcword() 0 8 1
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A html_escape() 0 6 1
A toUTF8() 0 3 1
A string() 0 10 1
C normalize_encoding() 0 134 14
B rxClass() 0 39 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 155 5
C is_utf16() 0 65 16
A isHtml() 0 3 1
A normalize_whitespace() 0 9 1
A str_starts_with() 0 11 3
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 14 2
C substr_count_in_byte() 0 54 15
A strchr() 0 13 1
A strichr() 0 13 1
A isUtf32() 0 3 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 31 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
A is_alpha() 0 8 2
C str_titleize() 0 69 12
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A isUtf8() 0 3 1
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 19 4
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 8 2
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 34 8
A str_contains_all() 0 23 6
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 65 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B rawurldecode() 0 37 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 79 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 8 2
A str_replace() 0 14 1
A substr_iright() 0 15 4
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
A lowerCaseFirst() 0 8 1
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A remove_right() 0 25 4
F strrpos() 0 118 25
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
A to_ascii() 0 6 1
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 26 5
A json_loaded() 0 3 1
A isBom() 0 3 1
B str_snakeize() 0 55 6
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 55 11
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A str_upper_first() 0 13 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 53 11
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C is_utf32() 0 65 16
C ord() 0 72 16
A is_alphanumeric() 0 8 2
A strtonatfold() 0 7 1
A json_decode() 0 14 2
C strcspn() 0 52 12
B is_json() 0 29 8
A fixStrCaseHelper() 0 36 5
A int_to_hex() 0 7 2
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 20 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A htmlentities() 0 28 3
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A isJson() 0 3 1
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 38 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A min() 0 14 3
C html_entity_decode() 0 77 17
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 26 4
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 9 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // IDEOGRAPHIC SPACE
92
        12288 => "\xe3\x80\x80",
93
    ];
94
95
    /**
96
     * @var array
97
     */
98
    private static $WHITESPACE_TABLE = [
99
        'SPACE'                     => "\x20",
100
        'NO-BREAK SPACE'            => "\xc2\xa0",
101
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
102
        'EN QUAD'                   => "\xe2\x80\x80",
103
        'EM QUAD'                   => "\xe2\x80\x81",
104
        'EN SPACE'                  => "\xe2\x80\x82",
105
        'EM SPACE'                  => "\xe2\x80\x83",
106
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
107
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
108
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
109
        'FIGURE SPACE'              => "\xe2\x80\x87",
110
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
111
        'THIN SPACE'                => "\xe2\x80\x89",
112
        'HAIR SPACE'                => "\xe2\x80\x8a",
113
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
114
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
115
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
116
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
117
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
118
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
119
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
120
    ];
121
122
    /**
123
     * @var array{upper: string[], lower: string[]}
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{upper at position 0 could not be parsed: Unknown type name 'array{upper' at position 0 in array{upper.
Loading history...
124
     */
125
    private static $COMMON_CASE_FOLD = [
126
        'upper' => [
127
            'µ',
128
            'ſ',
129
            "\xCD\x85",
130
            'ς',
131
            'ẞ',
132
            "\xCF\x90",
133
            "\xCF\x91",
134
            "\xCF\x95",
135
            "\xCF\x96",
136
            "\xCF\xB0",
137
            "\xCF\xB1",
138
            "\xCF\xB5",
139
            "\xE1\xBA\x9B",
140
            "\xE1\xBE\xBE",
141
        ],
142
        'lower' => [
143
            'μ',
144
            's',
145
            'ι',
146
            'σ',
147
            'ß',
148
            'β',
149
            'θ',
150
            'φ',
151
            'π',
152
            'κ',
153
            'ρ',
154
            'ε',
155
            "\xE1\xB9\xA1",
156
            'ι',
157
        ],
158
    ];
159
160
    /**
161
     * @var array
162
     */
163
    private static $SUPPORT = [];
164
165
    /**
166
     * @var array|null
167
     */
168
    private static $BROKEN_UTF8_FIX;
169
170
    /**
171
     * @var array|null
172
     */
173
    private static $WIN1252_TO_UTF8;
174
175
    /**
176
     * @var array|null
177
     */
178
    private static $INTL_TRANSLITERATOR_LIST;
179
180
    /**
181
     * @var array|null
182
     */
183
    private static $ENCODINGS;
184
185
    /**
186
     * @var array|null
187
     */
188
    private static $ORD;
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $EMOJI;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $EMOJI_VALUES_CACHE;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $EMOJI_KEYS_CACHE;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $CHR;
214
215
    /**
216
     * __construct()
217
     */
218 33
    public function __construct()
219
    {
220 33
    }
221
222
    /**
223
     * Return the character at the specified position: $str[1] like functionality.
224
     *
225
     * @param string $str      <p>A UTF-8 string.</p>
226
     * @param int    $pos      <p>The position of character to return.</p>
227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
228
     *
229
     * @return string single multi-byte character
230
     */
231 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
232
    {
233 3
        if ($str === '' || $pos < 0) {
234 2
            return '';
235
        }
236
237 3
        if ($encoding === 'UTF-8') {
238 3
            return (string) \mb_substr($str, $pos, 1);
239
        }
240
241
        return (string) self::substr($str, $pos, 1, $encoding);
242
    }
243
244
    /**
245
     * Prepends UTF-8 BOM character to the string and returns the whole string.
246
     *
247
     * INFO: If BOM already existed there, the Input string is returned.
248
     *
249
     * @param string $str <p>The input string.</p>
250
     *
251
     * @return string the output string that contains BOM
252
     */
253 2
    public static function add_bom_to_string(string $str): string
254
    {
255 2
        if (self::string_has_bom($str) === false) {
256 2
            $str = self::bom() . $str;
257
        }
258
259 2
        return $str;
260
    }
261
262
    /**
263
     * Changes all keys in an array.
264
     *
265
     * @param array  $array    <p>The array to work on</p>
266
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
267
     *                         or <strong>CASE_LOWER</strong> (default)</p>
268
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
269
     *
270
     * @return string[]
271
     *                  <p>An array with its keys lower- or uppercased.</p>
272
     */
273 2
    public static function array_change_key_case(
274
        array $array,
275
        int $case = \CASE_LOWER,
276
        string $encoding = 'UTF-8'
277
    ): array {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => &$value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower((string) $key, $encoding)
290 2
                : self::strtoupper((string) $key, $encoding);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(
312
        string $str,
313
        string $start,
314
        string $end,
315
        int $offset = 0,
316
        string $encoding = 'UTF-8'
317
    ): string {
318 16
        if ($encoding === 'UTF-8') {
319 8
            $start_position = \mb_strpos($str, $start, $offset);
320 8
            if ($start_position === false) {
321 1
                return '';
322
            }
323
324 7
            $substr_index = $start_position + (int) \mb_strlen($start);
325 7
            $end_position = \mb_strpos($str, $end, $substr_index);
326
            if (
327 7
                $end_position === false
328
                ||
329 7
                $end_position === $substr_index
330
            ) {
331 2
                return '';
332
            }
333
334 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
335
        }
336
337 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
338
339 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
340 8
        if ($start_position === false) {
341 1
            return '';
342
        }
343
344 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
345 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
346
        if (
347 7
            $end_position === false
348
            ||
349 7
            $end_position === $substr_index
350
        ) {
351 2
            return '';
352
        }
353
354 5
        return (string) self::substr(
355 5
            $str,
356 5
            $substr_index,
357 5
            $end_position - $substr_index,
358 5
            $encoding
359
        );
360
    }
361
362
    /**
363
     * Convert binary into a string.
364
     *
365
     * @param mixed $bin 1|0
366
     *
367
     * @return string
368
     */
369 2
    public static function binary_to_str($bin): string
370
    {
371 2
        if (!isset($bin[0])) {
372
            return '';
373
        }
374
375 2
        $convert = \base_convert($bin, 2, 16);
376 2
        if ($convert === '0') {
377 1
            return '';
378
        }
379
380 2
        return \pack('H*', $convert);
381
    }
382
383
    /**
384
     * Returns the UTF-8 Byte Order Mark Character.
385
     *
386
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
387
     *
388
     * @return string UTF-8 Byte Order Mark
389
     */
390 4
    public static function bom(): string
391
    {
392 4
        return "\xef\xbb\xbf";
393
    }
394
395
    /**
396
     * @alias of UTF8::chr_map()
397
     *
398
     * @param array|string $callback
399
     * @param string       $str
400
     *
401
     * @return string[]
402
     *
403
     * @see UTF8::chr_map()
404
     */
405 2
    public static function callback($callback, string $str): array
406
    {
407 2
        return self::chr_map($callback, $str);
408
    }
409
410
    /**
411
     * Returns the character at $index, with indexes starting at 0.
412
     *
413
     * @param string $str      <p>The input string.</p>
414
     * @param int    $index    <p>Position of the character.</p>
415
     * @param string $encoding [optional] <p>Default is UTF-8</p>
416
     *
417
     * @return string the character at $index
418
     */
419 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
420
    {
421 9
        if ($encoding === 'UTF-8') {
422 5
            return (string) \mb_substr($str, $index, 1);
423
        }
424
425 4
        return (string) self::substr($str, $index, 1, $encoding);
426
    }
427
428
    /**
429
     * Returns an array consisting of the characters in the string.
430
     *
431
     * @param string $str <p>The input string.</p>
432
     *
433
     * @return string[] an array of chars
434
     */
435 3
    public static function chars(string $str): array
436
    {
437 3
        return self::str_split($str);
438
    }
439
440
    /**
441
     * This method will auto-detect your server environment for UTF-8 support.
442
     *
443
     * @return true|null
444
     *
445
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
446
     */
447 5
    public static function checkForSupport()
448
    {
449 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
450
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
451
452
            // http://php.net/manual/en/book.mbstring.php
453
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
454
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
455
            if (self::$SUPPORT['mbstring'] === true) {
456
                \mb_internal_encoding('UTF-8');
457
                /** @noinspection UnusedFunctionResultInspection */
458
                /** @noinspection PhpComposerExtensionStubsInspection */
459
                \mb_regex_encoding('UTF-8');
460
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
461
            }
462
463
            // http://php.net/manual/en/book.iconv.php
464
            self::$SUPPORT['iconv'] = self::iconv_loaded();
465
466
            // http://php.net/manual/en/book.intl.php
467
            self::$SUPPORT['intl'] = self::intl_loaded();
468
469
            // http://php.net/manual/en/class.intlchar.php
470
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
471
472
            // http://php.net/manual/en/book.ctype.php
473
            self::$SUPPORT['ctype'] = self::ctype_loaded();
474
475
            // http://php.net/manual/en/class.finfo.php
476
            self::$SUPPORT['finfo'] = self::finfo_loaded();
477
478
            // http://php.net/manual/en/book.json.php
479
            self::$SUPPORT['json'] = self::json_loaded();
480
481
            // http://php.net/manual/en/book.pcre.php
482
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
483
484
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
485
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
486
                \mb_internal_encoding('UTF-8');
487
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
488
            }
489
490
            return true;
491
        }
492
493 5
        return null;
494
    }
495
496
    /**
497
     * Generates a UTF-8 encoded character from the given code point.
498
     *
499
     * INFO: opposite to UTF8::ord()
500
     *
501
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
502
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
503
     *
504
     * @return string|null multi-byte character, returns null on failure or empty input
505
     */
506 25
    public static function chr($code_point, string $encoding = 'UTF-8')
507
    {
508
        // init
509 25
        static $CHAR_CACHE = [];
510
511 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
512 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
513
        }
514
515
        if (
516 25
            $encoding !== 'UTF-8'
517
            &&
518 25
            $encoding !== 'ISO-8859-1'
519
            &&
520 25
            $encoding !== 'WINDOWS-1252'
521
            &&
522 25
            self::$SUPPORT['mbstring'] === false
523
        ) {
524
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
525
        }
526
527 25
        $cache_key = $code_point . $encoding;
528 25
        if (isset($CHAR_CACHE[$cache_key]) === true) {
529 23
            return $CHAR_CACHE[$cache_key];
530
        }
531
532 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
533
534 13
            if (self::$CHR === null) {
535
                self::$CHR = self::getData('chr');
536
            }
537
538
            /**
539
             * @psalm-suppress PossiblyNullArrayAccess
540
             */
541 13
            $chr = self::$CHR[$code_point];
542
543 13
            if ($encoding !== 'UTF-8') {
544 1
                $chr = self::encode($encoding, $chr);
545
            }
546
547 13
            return $CHAR_CACHE[$cache_key] = $chr;
548
        }
549
550
        //
551
        // fallback via "IntlChar"
552
        //
553
554 7
        if (self::$SUPPORT['intlChar'] === true) {
555
            /** @noinspection PhpComposerExtensionStubsInspection */
556 7
            $chr = \IntlChar::chr($code_point);
557
558 7
            if ($encoding !== 'UTF-8') {
559
                $chr = self::encode($encoding, $chr);
560
            }
561
562 7
            return $CHAR_CACHE[$cache_key] = $chr;
563
        }
564
565
        //
566
        // fallback via vanilla php
567
        //
568
569
        if (self::$CHR === null) {
570
            self::$CHR = self::getData('chr');
571
        }
572
573
        $code_point = (int) $code_point;
574
        if ($code_point <= 0x7F) {
575
            /**
576
             * @psalm-suppress PossiblyNullArrayAccess
577
             */
578
            $chr = self::$CHR[$code_point];
579
        } elseif ($code_point <= 0x7FF) {
580
            /**
581
             * @psalm-suppress PossiblyNullArrayAccess
582
             */
583
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
584
                   self::$CHR[($code_point & 0x3F) + 0x80];
585
        } elseif ($code_point <= 0xFFFF) {
586
            /**
587
             * @psalm-suppress PossiblyNullArrayAccess
588
             */
589
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
590
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
591
                   self::$CHR[($code_point & 0x3F) + 0x80];
592
        } else {
593
            /**
594
             * @psalm-suppress PossiblyNullArrayAccess
595
             */
596
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
597
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
598
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
599
                   self::$CHR[($code_point & 0x3F) + 0x80];
600
        }
601
602
        if ($encoding !== 'UTF-8') {
603
            $chr = self::encode($encoding, $chr);
604
        }
605
606
        return $CHAR_CACHE[$cache_key] = $chr;
607
    }
608
609
    /**
610
     * Applies callback to all characters of a string.
611
     *
612
     * @param array|string $callback <p>The callback function.</p>
613
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
614
     *
615
     * @return string[] the outcome of callback
616
     */
617 2
    public static function chr_map($callback, string $str): array
618
    {
619 2
        return \array_map(
620 2
            $callback,
621 2
            self::str_split($str)
622
        );
623
    }
624
625
    /**
626
     * Generates an array of byte length of each character of a Unicode string.
627
     *
628
     * 1 byte => U+0000  - U+007F
629
     * 2 byte => U+0080  - U+07FF
630
     * 3 byte => U+0800  - U+FFFF
631
     * 4 byte => U+10000 - U+10FFFF
632
     *
633
     * @param string $str <p>The original unicode string.</p>
634
     *
635
     * @return int[] an array of byte lengths of each character
636
     */
637 4
    public static function chr_size_list(string $str): array
638
    {
639 4
        if ($str === '') {
640 4
            return [];
641
        }
642
643 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
644
            return \array_map(
645
                static function (string $data): int {
646
                    // "mb_" is available if overload is used, so use it ...
647
                    return \mb_strlen($data, 'CP850'); // 8-BIT
648
                },
649
                self::str_split($str)
650
            );
651
        }
652
653 4
        return \array_map('\strlen', self::str_split($str));
654
    }
655
656
    /**
657
     * Get a decimal code representation of a specific character.
658
     *
659
     * @param string $char <p>The input character.</p>
660
     *
661
     * @return int
662
     */
663 4
    public static function chr_to_decimal(string $char): int
664
    {
665 4
        $code = self::ord($char[0]);
666 4
        $bytes = 1;
667
668 4
        if (!($code & 0x80)) {
669
            // 0xxxxxxx
670 4
            return $code;
671
        }
672
673 4
        if (($code & 0xe0) === 0xc0) {
674
            // 110xxxxx
675 4
            $bytes = 2;
676 4
            $code &= ~0xc0;
677 4
        } elseif (($code & 0xf0) === 0xe0) {
678
            // 1110xxxx
679 4
            $bytes = 3;
680 4
            $code &= ~0xe0;
681 2
        } elseif (($code & 0xf8) === 0xf0) {
682
            // 11110xxx
683 2
            $bytes = 4;
684 2
            $code &= ~0xf0;
685
        }
686
687 4
        for ($i = 2; $i <= $bytes; ++$i) {
688
            // 10xxxxxx
689 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
690
        }
691
692 4
        return $code;
693
    }
694
695
    /**
696
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
697
     *
698
     * @param int|string $char   <p>The input character</p>
699
     * @param string     $prefix [optional]
700
     *
701
     * @return string The code point encoded as U+xxxx
702
     */
703 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
704
    {
705 2
        if ($char === '') {
706 2
            return '';
707
        }
708
709 2
        if ($char === '&#0;') {
710 2
            $char = '';
711
        }
712
713 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
714
    }
715
716
    /**
717
     * alias for "UTF8::chr_to_decimal()"
718
     *
719
     * @param string $chr
720
     *
721
     * @return int
722
     *
723
     * @see UTF8::chr_to_decimal()
724
     */
725 2
    public static function chr_to_int(string $chr): int
726
    {
727 2
        return self::chr_to_decimal($chr);
728
    }
729
730
    /**
731
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
732
     *
733
     * @param string $body         <p>The original string to be split.</p>
734
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
735
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
736
     *
737
     * @return string the chunked string
738
     */
739 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
740
    {
741 4
        return \implode($end, self::str_split($body, $chunk_length));
742
    }
743
744
    /**
745
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
746
     *
747
     * @param string $str                           <p>The string to be sanitized.</p>
748
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
749
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
750
     *                                              whitespace.</p>
751
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
752
     *                                              e.g.: "…"
753
     *                                              => "..."</p>
754
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
755
     *                                              combination with
756
     *                                              $normalize_whitespace</p>
757
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
758
     *                                              mark e.g.: "�"</p>
759
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
760
     *                                              characters e.g.: "\0"</p>
761
     *
762
     * @return string clean UTF-8 encoded string
763
     */
764 87
    public static function clean(
765
        string $str,
766
        bool $remove_bom = false,
767
        bool $normalize_whitespace = false,
768
        bool $normalize_msword = false,
769
        bool $keep_non_breaking_space = false,
770
        bool $replace_diamond_question_mark = false,
771
        bool $remove_invisible_characters = true
772
    ): string {
773
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
774
        // caused connection reset problem on larger strings
775
776 87
        $regex = '/
777
          (
778
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
779
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
780
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
781
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
782
            ){1,100}                      # ...one or more times
783
          )
784
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
785
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
786
        /x';
787
        /** @noinspection NotOptimalRegularExpressionsInspection */
788 87
        $str = (string) \preg_replace($regex, '$1', $str);
789
790 87
        if ($replace_diamond_question_mark === true) {
791 33
            $str = self::replace_diamond_question_mark($str, '');
792
        }
793
794 87
        if ($remove_invisible_characters === true) {
795 87
            $str = self::remove_invisible_characters($str);
796
        }
797
798 87
        if ($normalize_whitespace === true) {
799 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
800
        }
801
802 87
        if ($normalize_msword === true) {
803 4
            $str = self::normalize_msword($str);
804
        }
805
806 87
        if ($remove_bom === true) {
807 37
            $str = self::remove_bom($str);
808
        }
809
810 87
        return $str;
811
    }
812
813
    /**
814
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
815
     *
816
     * @param string $str <p>The input string.</p>
817
     *
818
     * @return string
819
     */
820 33
    public static function cleanup($str): string
821
    {
822
        // init
823 33
        $str = (string) $str;
824
825 33
        if ($str === '') {
826 5
            return '';
827
        }
828
829
        // fixed ISO <-> UTF-8 Errors
830 33
        $str = self::fix_simple_utf8($str);
831
832
        // remove all none UTF-8 symbols
833
        // && remove diamond question mark (�)
834
        // && remove remove invisible characters (e.g. "\0")
835
        // && remove BOM
836
        // && normalize whitespace chars (but keep non-breaking-spaces)
837 33
        return self::clean(
838 33
            $str,
839 33
            true,
840 33
            true,
841 33
            false,
842 33
            true,
843 33
            true,
844 33
            true
845
        );
846
    }
847
848
    /**
849
     * Accepts a string or a array of strings and returns an array of Unicode code points.
850
     *
851
     * INFO: opposite to UTF8::string()
852
     *
853
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
854
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
855
     *                                 default, code points will be returned as integers.</p>
856
     *
857
     * @return array<int|string>
858
     *                           The array of code points:<br>
859
     *                           array<int> for $u_style === false<br>
860
     *                           array<string> for $u_style === true<br>
861
     */
862 12
    public static function codepoints($arg, bool $u_style = false): array
863
    {
864 12
        if (\is_string($arg) === true) {
865 12
            $arg = self::str_split($arg);
866
        }
867
868 12
        $arg = \array_map(
869
            [
870 12
                self::class,
871
                'ord',
872
            ],
873 12
            $arg
874
        );
875
876 12
        if (\count($arg) === 0) {
877 7
            return [];
878
        }
879
880 11
        if ($u_style === true) {
881 2
            $arg = \array_map(
882
                [
883 2
                    self::class,
884
                    'int_to_hex',
885
                ],
886 2
                $arg
887
            );
888
        }
889
890 11
        return $arg;
891
    }
892
893
    /**
894
     * Trims the string and replaces consecutive whitespace characters with a
895
     * single space. This includes tabs and newline characters, as well as
896
     * multibyte whitespace such as the thin space and ideographic space.
897
     *
898
     * @param string $str <p>The input string.</p>
899
     *
900
     * @return string string with a trimmed $str and condensed whitespace
901
     */
902 13
    public static function collapse_whitespace(string $str): string
903
    {
904 13
        if (self::$SUPPORT['mbstring'] === true) {
905
            /** @noinspection PhpComposerExtensionStubsInspection */
906 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
907
        }
908
909
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
910
    }
911
912
    /**
913
     * Returns count of characters used in a string.
914
     *
915
     * @param string $str                     <p>The input string.</p>
916
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
917
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
918
     *
919
     * @return int[] an associative array of Character as keys and
920
     *               their count as values
921
     */
922 19
    public static function count_chars(
923
        string $str,
924
        bool $clean_utf8 = false,
925
        bool $try_to_use_mb_functions = true
926
    ): array {
927 19
        return \array_count_values(
928 19
            self::str_split(
929 19
                $str,
930 19
                1,
931 19
                $clean_utf8,
932 19
                $try_to_use_mb_functions
933
            )
934
        );
935
    }
936
937
    /**
938
     * Remove css media-queries.
939
     *
940
     * @param string $str
941
     *
942
     * @return string
943
     */
944 1
    public static function css_stripe_media_queries(string $str): string
945
    {
946 1
        return (string) \preg_replace(
947 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
948 1
            '',
949 1
            $str
950
        );
951
    }
952
953
    /**
954
     * Checks whether ctype is available on the server.
955
     *
956
     * @return bool
957
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
958
     */
959
    public static function ctype_loaded(): bool
960
    {
961
        return \extension_loaded('ctype');
962
    }
963
964
    /**
965
     * Converts an int value into a UTF-8 character.
966
     *
967
     * @param mixed $int
968
     *
969
     * @return string
970
     */
971 19
    public static function decimal_to_chr($int): string
972
    {
973 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
974
    }
975
976
    /**
977
     * Decodes a MIME header field
978
     *
979
     * @param string $str
980
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
981
     *
982
     * @return false|string
983
     *                      A decoded MIME field on success,
984
     *                      or false if an error occurs during the decoding
985
     */
986
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
987
    {
988
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
989
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
990
        }
991
992
        if (self::$SUPPORT['iconv'] === true) {
993
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
994
        }
995
996
        if ($encoding !== 'UTF-8') {
997
            $str = self::encode($encoding, $str);
998
        }
999
1000
        return \mb_decode_mimeheader($str);
1001
    }
1002
1003
    /**
1004
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1005
     *
1006
     * @param string $str                            <p>The input string.</p>
1007
     * @param bool   $use_reversible_string_mappings [optional] <p>
1008
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1009
     *                                               between "emoji_encode" and "emoji_decode".</p>
1010
     *
1011
     * @return string
1012
     */
1013 9
    public static function emoji_decode(
1014
        string $str,
1015
        bool $use_reversible_string_mappings = false
1016
    ): string {
1017 9
        self::initEmojiData();
1018
1019 9
        if ($use_reversible_string_mappings === true) {
1020 9
            return (string) \str_replace(
1021 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1022 9
                (array) self::$EMOJI_VALUES_CACHE,
1023 9
                $str
1024
            );
1025
        }
1026
1027 1
        return (string) \str_replace(
1028 1
            (array) self::$EMOJI_KEYS_CACHE,
1029 1
            (array) self::$EMOJI_VALUES_CACHE,
1030 1
            $str
1031
        );
1032
    }
1033
1034
    /**
1035
     * Encode a string with emoji chars into a non-emoji string.
1036
     *
1037
     * @param string $str                            <p>The input string</p>
1038
     * @param bool   $use_reversible_string_mappings [optional] <p>
1039
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1040
     *                                               between "emoji_encode" and "emoji_decode"</p>
1041
     *
1042
     * @return string
1043
     */
1044 12
    public static function emoji_encode(
1045
        string $str,
1046
        bool $use_reversible_string_mappings = false
1047
    ): string {
1048 12
        self::initEmojiData();
1049
1050 12
        if ($use_reversible_string_mappings === true) {
1051 9
            return (string) \str_replace(
1052 9
                (array) self::$EMOJI_VALUES_CACHE,
1053 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1054 9
                $str
1055
            );
1056
        }
1057
1058 4
        return (string) \str_replace(
1059 4
            (array) self::$EMOJI_VALUES_CACHE,
1060 4
            (array) self::$EMOJI_KEYS_CACHE,
1061 4
            $str
1062
        );
1063
    }
1064
1065
    /**
1066
     * Encode a string with a new charset-encoding.
1067
     *
1068
     * INFO:  This function will also try to fix broken / double encoding,
1069
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1070
     *
1071
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1072
     * @param string $str                           <p>The input string</p>
1073
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1074
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1075
     *                                              string-encoding</p>
1076
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1077
     *                                              A empty string will trigger the autodetect anyway.</p>
1078
     *
1079
     * @return string
1080
     *
1081
     * @psalm-suppress InvalidReturnStatement
1082
     */
1083 28
    public static function encode(
1084
        string $to_encoding,
1085
        string $str,
1086
        bool $auto_detect_the_from_encoding = true,
1087
        string $from_encoding = ''
1088
    ): string {
1089 28
        if ($str === '' || $to_encoding === '') {
1090 13
            return $str;
1091
        }
1092
1093 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1094 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1095
        }
1096
1097 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1098 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1099
        }
1100
1101
        if (
1102 28
            $to_encoding
1103
            &&
1104 28
            $from_encoding
1105
            &&
1106 28
            $from_encoding === $to_encoding
1107
        ) {
1108
            return $str;
1109
        }
1110
1111 28
        if ($to_encoding === 'JSON') {
1112 1
            $return = self::json_encode($str);
1113 1
            if ($return === false) {
1114
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1115
            }
1116
1117 1
            return $return;
1118
        }
1119 28
        if ($from_encoding === 'JSON') {
1120 1
            $str = self::json_decode($str);
1121 1
            $from_encoding = '';
1122
        }
1123
1124 28
        if ($to_encoding === 'BASE64') {
1125 2
            return \base64_encode($str);
1126
        }
1127 28
        if ($from_encoding === 'BASE64') {
1128 2
            $str = \base64_decode($str, true);
1129 2
            $from_encoding = '';
1130
        }
1131
1132 28
        if ($to_encoding === 'HTML-ENTITIES') {
1133 2
            return self::html_encode($str, true, 'UTF-8');
1134
        }
1135 28
        if ($from_encoding === 'HTML-ENTITIES') {
1136 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1137 2
            $from_encoding = '';
1138
        }
1139
1140 28
        $from_encoding_auto_detected = false;
1141
        if (
1142 28
            $auto_detect_the_from_encoding === true
1143
            ||
1144 28
            !$from_encoding
1145
        ) {
1146 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1147
        }
1148
1149
        // DEBUG
1150
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1151
1152 28
        if ($from_encoding_auto_detected !== false) {
1153
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1154 24
            $from_encoding = $from_encoding_auto_detected;
1155 7
        } elseif ($auto_detect_the_from_encoding === true) {
1156
            // fallback for the "autodetect"-mode
1157 7
            return self::to_utf8($str);
1158
        }
1159
1160
        if (
1161 24
            !$from_encoding
1162
            ||
1163 24
            $from_encoding === $to_encoding
1164
        ) {
1165 15
            return $str;
1166
        }
1167
1168
        if (
1169 19
            $to_encoding === 'UTF-8'
1170
            &&
1171
            (
1172 17
                $from_encoding === 'WINDOWS-1252'
1173
                ||
1174 19
                $from_encoding === 'ISO-8859-1'
1175
            )
1176
        ) {
1177 13
            return self::to_utf8($str);
1178
        }
1179
1180
        if (
1181 12
            $to_encoding === 'ISO-8859-1'
1182
            &&
1183
            (
1184 6
                $from_encoding === 'WINDOWS-1252'
1185
                ||
1186 12
                $from_encoding === 'UTF-8'
1187
            )
1188
        ) {
1189 6
            return self::to_iso8859($str);
1190
        }
1191
1192
        if (
1193 10
            $to_encoding !== 'UTF-8'
1194
            &&
1195 10
            $to_encoding !== 'ISO-8859-1'
1196
            &&
1197 10
            $to_encoding !== 'WINDOWS-1252'
1198
            &&
1199 10
            self::$SUPPORT['mbstring'] === false
1200
        ) {
1201
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1202
        }
1203
1204 10
        if (self::$SUPPORT['mbstring'] === true) {
1205
            // warning: do not use the symfony polyfill here
1206 10
            $str_encoded = \mb_convert_encoding(
1207 10
                $str,
1208 10
                $to_encoding,
1209 10
                $from_encoding
1210
            );
1211
1212 10
            if ($str_encoded) {
1213 10
                return $str_encoded;
1214
            }
1215
        }
1216
1217
        $return = \iconv($from_encoding, $to_encoding, $str);
1218
        if ($return !== false) {
1219
            return $return;
1220
        }
1221
1222
        return $str;
1223
    }
1224
1225
    /**
1226
     * @param string $str
1227
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1228
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1229
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1230
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1231
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1232
     *
1233
     * @return false|string
1234
     *                      <p>An encoded MIME field on success,
1235
     *                      or false if an error occurs during the encoding.</p>
1236
     */
1237
    public static function encode_mimeheader(
1238
        $str,
1239
        $from_charset = 'UTF-8',
1240
        $to_charset = 'UTF-8',
1241
        $transfer_encoding = 'Q',
1242
        $linefeed = '\\r\\n',
1243
        $indent = 76
1244
    ) {
1245
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1246
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1247
        }
1248
1249
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1250
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1251
        }
1252
1253
        return \iconv_mime_encode(
1254
            '',
1255
            $str,
1256
            [
1257
                'scheme'           => $transfer_encoding,
1258
                'line-length'      => $indent,
1259
                'input-charset'    => $from_charset,
1260
                'output-charset'   => $to_charset,
1261
                'line-break-chars' => $linefeed,
1262
            ]
1263
        );
1264
    }
1265
1266
    /**
1267
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1268
     *
1269
     * @param string   $str                       <p>The input string.</p>
1270
     * @param string   $search                    <p>The searched string.</p>
1271
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1272
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1273
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1274
     *
1275
     * @return string
1276
     */
1277 1
    public static function extract_text(
1278
        string $str,
1279
        string $search = '',
1280
        int $length = null,
1281
        string $replacer_for_skipped_text = '…',
1282
        string $encoding = 'UTF-8'
1283
    ): string {
1284 1
        if ($str === '') {
1285 1
            return '';
1286
        }
1287
1288 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1289
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1290
        }
1291
1292 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1293
1294 1
        if ($length === null) {
1295 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1296
        }
1297
1298 1
        if ($search === '') {
1299 1
            if ($encoding === 'UTF-8') {
1300 1
                if ($length > 0) {
1301 1
                    $string_length = (int) \mb_strlen($str);
1302 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1303
                } else {
1304 1
                    $end = 0;
1305
                }
1306
1307 1
                $pos = (int) \min(
1308 1
                    \mb_strpos($str, ' ', $end),
1309 1
                    \mb_strpos($str, '.', $end)
1310
                );
1311
            } else {
1312
                if ($length > 0) {
1313
                    $string_length = (int) self::strlen($str, $encoding);
1314
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1315
                } else {
1316
                    $end = 0;
1317
                }
1318
1319
                $pos = (int) \min(
1320
                    self::strpos($str, ' ', $end, $encoding),
1321
                    self::strpos($str, '.', $end, $encoding)
1322
                );
1323
            }
1324
1325 1
            if ($pos) {
1326 1
                if ($encoding === 'UTF-8') {
1327 1
                    $str_sub = \mb_substr($str, 0, $pos);
1328
                } else {
1329
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1330
                }
1331
1332 1
                if ($str_sub === false) {
1333
                    return '';
1334
                }
1335
1336 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1337
            }
1338
1339
            return $str;
1340
        }
1341
1342 1
        if ($encoding === 'UTF-8') {
1343 1
            $word_position = (int) \mb_stripos($str, $search);
1344 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1345
        } else {
1346
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1347
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1348
        }
1349
1350 1
        $pos_start = 0;
1351 1
        if ($half_side > 0) {
1352 1
            if ($encoding === 'UTF-8') {
1353 1
                $half_text = \mb_substr($str, 0, $half_side);
1354
            } else {
1355
                $half_text = self::substr($str, 0, $half_side, $encoding);
1356
            }
1357 1
            if ($half_text !== false) {
1358 1
                if ($encoding === 'UTF-8') {
1359 1
                    $pos_start = (int) \max(
1360 1
                        \mb_strrpos($half_text, ' '),
1361 1
                        \mb_strrpos($half_text, '.')
1362
                    );
1363
                } else {
1364
                    $pos_start = (int) \max(
1365
                        self::strrpos($half_text, ' ', 0, $encoding),
1366
                        self::strrpos($half_text, '.', 0, $encoding)
1367
                    );
1368
                }
1369
            }
1370
        }
1371
1372 1
        if ($word_position && $half_side > 0) {
1373 1
            $offset = $pos_start + $length - 1;
1374 1
            $real_length = (int) self::strlen($str, $encoding);
1375
1376 1
            if ($offset > $real_length) {
1377
                $offset = $real_length;
1378
            }
1379
1380 1
            if ($encoding === 'UTF-8') {
1381 1
                $pos_end = (int) \min(
1382 1
                    \mb_strpos($str, ' ', $offset),
1383 1
                    \mb_strpos($str, '.', $offset)
1384 1
                ) - $pos_start;
1385
            } else {
1386
                $pos_end = (int) \min(
1387
                    self::strpos($str, ' ', $offset, $encoding),
1388
                    self::strpos($str, '.', $offset, $encoding)
1389
                ) - $pos_start;
1390
            }
1391
1392 1
            if (!$pos_end || $pos_end <= 0) {
1393 1
                if ($encoding === 'UTF-8') {
1394 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1395
                } else {
1396
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1397
                }
1398 1
                if ($str_sub !== false) {
1399 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1400
                } else {
1401 1
                    $extract = '';
1402
                }
1403
            } else {
1404 1
                if ($encoding === 'UTF-8') {
1405 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1406
                } else {
1407
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1408
                }
1409 1
                if ($str_sub !== false) {
1410 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1411
                } else {
1412 1
                    $extract = '';
1413
                }
1414
            }
1415
        } else {
1416 1
            $offset = $length - 1;
1417 1
            $true_length = (int) self::strlen($str, $encoding);
1418
1419 1
            if ($offset > $true_length) {
1420
                $offset = $true_length;
1421
            }
1422
1423 1
            if ($encoding === 'UTF-8') {
1424 1
                $pos_end = (int) \min(
1425 1
                    \mb_strpos($str, ' ', $offset),
1426 1
                    \mb_strpos($str, '.', $offset)
1427
                );
1428
            } else {
1429
                $pos_end = (int) \min(
1430
                    self::strpos($str, ' ', $offset, $encoding),
1431
                    self::strpos($str, '.', $offset, $encoding)
1432
                );
1433
            }
1434
1435 1
            if ($pos_end) {
1436 1
                if ($encoding === 'UTF-8') {
1437 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1438
                } else {
1439
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1440
                }
1441 1
                if ($str_sub !== false) {
1442 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1443
                } else {
1444 1
                    $extract = '';
1445
                }
1446
            } else {
1447 1
                $extract = $str;
1448
            }
1449
        }
1450
1451 1
        return $extract;
1452
    }
1453
1454
    /**
1455
     * Reads entire file into a string.
1456
     *
1457
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1458
     *
1459
     * @see http://php.net/manual/en/function.file-get-contents.php
1460
     *
1461
     * @param string        $filename         <p>
1462
     *                                        Name of the file to read.
1463
     *                                        </p>
1464
     * @param bool          $use_include_path [optional] <p>
1465
     *                                        Prior to PHP 5, this parameter is called
1466
     *                                        use_include_path and is a bool.
1467
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1468
     *                                        to trigger include path
1469
     *                                        search.
1470
     *                                        </p>
1471
     * @param resource|null $context          [optional] <p>
1472
     *                                        A valid context resource created with
1473
     *                                        stream_context_create. If you don't need to use a
1474
     *                                        custom context, you can skip this parameter by &null;.
1475
     *                                        </p>
1476
     * @param int|null      $offset           [optional] <p>
1477
     *                                        The offset where the reading starts.
1478
     *                                        </p>
1479
     * @param int|null      $max_length       [optional] <p>
1480
     *                                        Maximum length of data read. The default is to read until end
1481
     *                                        of file is reached.
1482
     *                                        </p>
1483
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1484
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1485
     *                                        some files, because they used non default utf-8 chars. Binary files
1486
     *                                        like images or pdf will not be converted.</p>
1487
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1488
     *                                        A empty string will trigger the autodetect anyway.</p>
1489
     *
1490
     * @return false|string
1491
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1492
     */
1493 12
    public static function file_get_contents(
1494
        string $filename,
1495
        bool $use_include_path = false,
1496
        $context = null,
1497
        int $offset = null,
1498
        int $max_length = null,
1499
        int $timeout = 10,
1500
        bool $convert_to_utf8 = true,
1501
        string $from_encoding = ''
1502
    ) {
1503
        // init
1504 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1505
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1506 12
        if ($filename === false) {
1507
            return false;
1508
        }
1509
1510 12
        if ($timeout && $context === null) {
1511 9
            $context = \stream_context_create(
1512
                [
1513
                    'http' => [
1514 9
                        'timeout' => $timeout,
1515
                    ],
1516
                ]
1517
            );
1518
        }
1519
1520 12
        if ($offset === null) {
1521 12
            $offset = 0;
1522
        }
1523
1524 12
        if (\is_int($max_length) === true) {
1525 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1526
        } else {
1527 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1528
        }
1529
1530
        // return false on error
1531 12
        if ($data === false) {
1532
            return false;
1533
        }
1534
1535 12
        if ($convert_to_utf8 === true) {
1536
            if (
1537 12
                self::is_binary($data, true) !== true
1538
                ||
1539 9
                self::is_utf16($data, false) !== false
1540
                ||
1541 12
                self::is_utf32($data, false) !== false
1542
            ) {
1543 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1544 9
                $data = self::cleanup($data);
1545
            }
1546
        }
1547
1548 12
        return $data;
1549
    }
1550
1551
    /**
1552
     * Checks if a file starts with BOM (Byte Order Mark) character.
1553
     *
1554
     * @param string $file_path <p>Path to a valid file.</p>
1555
     *
1556
     * @throws \RuntimeException if file_get_contents() returned false
1557
     *
1558
     * @return bool
1559
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1560
     */
1561 2
    public static function file_has_bom(string $file_path): bool
1562
    {
1563 2
        $file_content = \file_get_contents($file_path);
1564 2
        if ($file_content === false) {
1565
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1566
        }
1567
1568 2
        return self::string_has_bom($file_content);
1569
    }
1570
1571
    /**
1572
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1573
     *
1574
     * @param mixed  $var
1575
     * @param int    $normalization_form
1576
     * @param string $leading_combining
1577
     *
1578
     * @return mixed
1579
     */
1580 62
    public static function filter(
1581
        $var,
1582
        int $normalization_form = \Normalizer::NFC,
1583
        string $leading_combining = '◌'
1584
    ) {
1585 62
        switch (\gettype($var)) {
1586 62
            case 'array':
1587
                /** @noinspection ForeachSourceInspection */
1588 6
                foreach ($var as $k => &$v) {
1589 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1590
                }
1591 6
                unset($v);
1592
1593 6
                break;
1594 62
            case 'object':
1595
                /** @noinspection ForeachSourceInspection */
1596 4
                foreach ($var as $k => &$v) {
1597 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1598
                }
1599 4
                unset($v);
1600
1601 4
                break;
1602 62
            case 'string':
1603
1604 62
                if (\strpos($var, "\r") !== false) {
1605
                    // Workaround https://bugs.php.net/65732
1606 3
                    $var = self::normalize_line_ending($var);
1607
                }
1608
1609 62
                if (ASCII::is_ascii($var) === false) {
1610 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1611 27
                        $n = '-';
1612
                    } else {
1613 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1614
1615 12
                        if (isset($n[0])) {
1616 7
                            $var = $n;
1617
                        } else {
1618 8
                            $var = self::encode('UTF-8', $var, true);
1619
                        }
1620
                    }
1621
1622
                    if (
1623 32
                        $var[0] >= "\x80"
1624
                        &&
1625 32
                        isset($n[0], $leading_combining[0])
1626
                        &&
1627 32
                        \preg_match('/^\\p{Mn}/u', $var)
1628
                    ) {
1629
                        // Prevent leading combining chars
1630
                        // for NFC-safe concatenations.
1631 3
                        $var = $leading_combining . $var;
1632
                    }
1633
                }
1634
1635 62
                break;
1636
        }
1637
1638 62
        return $var;
1639
    }
1640
1641
    /**
1642
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1643
     *
1644
     * Gets a specific external variable by name and optionally filters it
1645
     *
1646
     * @see http://php.net/manual/en/function.filter-input.php
1647
     *
1648
     * @param int    $type          <p>
1649
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1650
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1651
     *                              <b>INPUT_ENV</b>.
1652
     *                              </p>
1653
     * @param string $variable_name <p>
1654
     *                              Name of a variable to get.
1655
     *                              </p>
1656
     * @param int    $filter        [optional] <p>
1657
     *                              The ID of the filter to apply. The
1658
     *                              manual page lists the available filters.
1659
     *                              </p>
1660
     * @param mixed  $options       [optional] <p>
1661
     *                              Associative array of options or bitwise disjunction of flags. If filter
1662
     *                              accepts options, flags can be provided in "flags" field of array.
1663
     *                              </p>
1664
     *
1665
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1666
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1667
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1668
     */
1669
    public static function filter_input(
1670
        int $type,
1671
        string $variable_name,
1672
        int $filter = \FILTER_DEFAULT,
1673
        $options = null
1674
    ) {
1675
        if (\func_num_args() < 4) {
1676
            $var = \filter_input($type, $variable_name, $filter);
1677
        } else {
1678
            $var = \filter_input($type, $variable_name, $filter, $options);
1679
        }
1680
1681
        return self::filter($var);
1682
    }
1683
1684
    /**
1685
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1686
     *
1687
     * Gets external variables and optionally filters them
1688
     *
1689
     * @see http://php.net/manual/en/function.filter-input-array.php
1690
     *
1691
     * @param int   $type       <p>
1692
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1693
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1694
     *                          <b>INPUT_ENV</b>.
1695
     *                          </p>
1696
     * @param mixed $definition [optional] <p>
1697
     *                          An array defining the arguments. A valid key is a string
1698
     *                          containing a variable name and a valid value is either a filter type, or an array
1699
     *                          optionally specifying the filter, flags and options. If the value is an
1700
     *                          array, valid keys are filter which specifies the
1701
     *                          filter type,
1702
     *                          flags which specifies any flags that apply to the
1703
     *                          filter, and options which specifies any options that
1704
     *                          apply to the filter. See the example below for a better understanding.
1705
     *                          </p>
1706
     *                          <p>
1707
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1708
     *                          input array are filtered by this filter.
1709
     *                          </p>
1710
     * @param bool  $add_empty  [optional] <p>
1711
     *                          Add missing keys as <b>NULL</b> to the return value.
1712
     *                          </p>
1713
     *
1714
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1715
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1716
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1717
     *               is not set and <b>NULL</b> if the filter fails.
1718
     */
1719
    public static function filter_input_array(
1720
        int $type,
1721
        $definition = null,
1722
        bool $add_empty = true
1723
    ) {
1724
        if (\func_num_args() < 2) {
1725
            $a = \filter_input_array($type);
1726
        } else {
1727
            $a = \filter_input_array($type, $definition, $add_empty);
1728
        }
1729
1730
        return self::filter($a);
1731
    }
1732
1733
    /**
1734
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1735
     *
1736
     * Filters a variable with a specified filter
1737
     *
1738
     * @see http://php.net/manual/en/function.filter-var.php
1739
     *
1740
     * @param mixed $variable <p>
1741
     *                        Value to filter.
1742
     *                        </p>
1743
     * @param int   $filter   [optional] <p>
1744
     *                        The ID of the filter to apply. The
1745
     *                        manual page lists the available filters.
1746
     *                        </p>
1747
     * @param mixed $options  [optional] <p>
1748
     *                        Associative array of options or bitwise disjunction of flags. If filter
1749
     *                        accepts options, flags can be provided in "flags" field of array. For
1750
     *                        the "callback" filter, callable type should be passed. The
1751
     *                        callback must accept one argument, the value to be filtered, and return
1752
     *                        the value after filtering/sanitizing it.
1753
     *                        </p>
1754
     *                        <p>
1755
     *                        <code>
1756
     *                        // for filters that accept options, use this format
1757
     *                        $options = array(
1758
     *                        'options' => array(
1759
     *                        'default' => 3, // value to return if the filter fails
1760
     *                        // other options here
1761
     *                        'min_range' => 0
1762
     *                        ),
1763
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1764
     *                        );
1765
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1766
     *                        // for filter that only accept flags, you can pass them directly
1767
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1768
     *                        // for filter that only accept flags, you can also pass as an array
1769
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1770
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1771
     *                        // callback validate filter
1772
     *                        function foo($value)
1773
     *                        {
1774
     *                        // Expected format: Surname, GivenNames
1775
     *                        if (strpos($value, ", ") === false) return false;
1776
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1777
     *                        $empty = (empty($surname) || empty($givennames));
1778
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1779
     *                        if ($empty || $notstrings) {
1780
     *                        return false;
1781
     *                        } else {
1782
     *                        return $value;
1783
     *                        }
1784
     *                        }
1785
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1786
     *                        </code>
1787
     *                        </p>
1788
     *
1789
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1790
     */
1791 2
    public static function filter_var(
1792
        $variable,
1793
        int $filter = \FILTER_DEFAULT,
1794
        $options = null
1795
    ) {
1796 2
        if (\func_num_args() < 3) {
1797 2
            $variable = \filter_var($variable, $filter);
1798
        } else {
1799 2
            $variable = \filter_var($variable, $filter, $options);
1800
        }
1801
1802 2
        return self::filter($variable);
1803
    }
1804
1805
    /**
1806
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1807
     *
1808
     * Gets multiple variables and optionally filters them
1809
     *
1810
     * @see http://php.net/manual/en/function.filter-var-array.php
1811
     *
1812
     * @param array $data       <p>
1813
     *                          An array with string keys containing the data to filter.
1814
     *                          </p>
1815
     * @param mixed $definition [optional] <p>
1816
     *                          An array defining the arguments. A valid key is a string
1817
     *                          containing a variable name and a valid value is either a
1818
     *                          filter type, or an
1819
     *                          array optionally specifying the filter, flags and options.
1820
     *                          If the value is an array, valid keys are filter
1821
     *                          which specifies the filter type,
1822
     *                          flags which specifies any flags that apply to the
1823
     *                          filter, and options which specifies any options that
1824
     *                          apply to the filter. See the example below for a better understanding.
1825
     *                          </p>
1826
     *                          <p>
1827
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1828
     *                          input array are filtered by this filter.
1829
     *                          </p>
1830
     * @param bool  $add_empty  [optional] <p>
1831
     *                          Add missing keys as <b>NULL</b> to the return value.
1832
     *                          </p>
1833
     *
1834
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1835
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1836
     *               set
1837
     */
1838 2
    public static function filter_var_array(
1839
        array $data,
1840
        $definition = null,
1841
        bool $add_empty = true
1842
    ) {
1843 2
        if (\func_num_args() < 2) {
1844 2
            $a = \filter_var_array($data);
1845
        } else {
1846 2
            $a = \filter_var_array($data, $definition, $add_empty);
1847
        }
1848
1849 2
        return self::filter($a);
1850
    }
1851
1852
    /**
1853
     * Checks whether finfo is available on the server.
1854
     *
1855
     * @return bool
1856
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1857
     */
1858
    public static function finfo_loaded(): bool
1859
    {
1860
        return \class_exists('finfo');
1861
    }
1862
1863
    /**
1864
     * Returns the first $n characters of the string.
1865
     *
1866
     * @param string $str      <p>The input string.</p>
1867
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1868
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1869
     *
1870
     * @return string
1871
     */
1872 13
    public static function first_char(
1873
        string $str,
1874
        int $n = 1,
1875
        string $encoding = 'UTF-8'
1876
    ): string {
1877 13
        if ($str === '' || $n <= 0) {
1878 5
            return '';
1879
        }
1880
1881 8
        if ($encoding === 'UTF-8') {
1882 4
            return (string) \mb_substr($str, 0, $n);
1883
        }
1884
1885 4
        return (string) self::substr($str, 0, $n, $encoding);
1886
    }
1887
1888
    /**
1889
     * Check if the number of Unicode characters isn't greater than the specified integer.
1890
     *
1891
     * @param string $str      the original string to be checked
1892
     * @param int    $box_size the size in number of chars to be checked against string
1893
     *
1894
     * @return bool true if string is less than or equal to $box_size, false otherwise
1895
     */
1896 2
    public static function fits_inside(string $str, int $box_size): bool
1897
    {
1898 2
        return (int) self::strlen($str) <= $box_size;
1899
    }
1900
1901
    /**
1902
     * Try to fix simple broken UTF-8 strings.
1903
     *
1904
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1905
     *
1906
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1907
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1908
     * See: http://en.wikipedia.org/wiki/Windows-1252
1909
     *
1910
     * @param string $str <p>The input string</p>
1911
     *
1912
     * @return string
1913
     */
1914 46
    public static function fix_simple_utf8(string $str): string
1915
    {
1916 46
        if ($str === '') {
1917 4
            return '';
1918
        }
1919
1920 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1921 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1922
1923 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1924 1
            if (self::$BROKEN_UTF8_FIX === null) {
1925 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1926
            }
1927
1928 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1929 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1930
        }
1931
1932 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1933
    }
1934
1935
    /**
1936
     * Fix a double (or multiple) encoded UTF8 string.
1937
     *
1938
     * @param string|string[] $str you can use a string or an array of strings
1939
     *
1940
     * @return string|string[]
1941
     *                         Will return the fixed input-"array" or
1942
     *                         the fixed input-"string"
1943
     *
1944
     * @psalm-suppress InvalidReturnType
1945
     */
1946 2
    public static function fix_utf8($str)
1947
    {
1948 2
        if (\is_array($str) === true) {
1949 2
            foreach ($str as $k => &$v) {
1950 2
                $v = self::fix_utf8($v);
1951
            }
1952 2
            unset($v);
1953
1954
            /**
1955
             * @psalm-suppress InvalidReturnStatement
1956
             */
1957 2
            return $str;
1958
        }
1959
1960 2
        $str = (string) $str;
1961 2
        $last = '';
1962 2
        while ($last !== $str) {
1963 2
            $last = $str;
1964
            /**
1965
             * @psalm-suppress PossiblyInvalidArgument
1966
             */
1967 2
            $str = self::to_utf8(
1968 2
                self::utf8_decode($str, true)
1969
            );
1970
        }
1971
1972
        /**
1973
         * @psalm-suppress InvalidReturnStatement
1974
         */
1975 2
        return $str;
1976
    }
1977
1978
    /**
1979
     * Get character of a specific character.
1980
     *
1981
     * @param string $char
1982
     *
1983
     * @return string 'RTL' or 'LTR'
1984
     */
1985 2
    public static function getCharDirection(string $char): string
1986
    {
1987 2
        if (self::$SUPPORT['intlChar'] === true) {
1988
            /** @noinspection PhpComposerExtensionStubsInspection */
1989 2
            $tmp_return = \IntlChar::charDirection($char);
1990
1991
            // from "IntlChar"-Class
1992
            $char_direction = [
1993 2
                'RTL' => [1, 13, 14, 15, 21],
1994
                'LTR' => [0, 11, 12, 20],
1995
            ];
1996
1997 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
1998
                return 'LTR';
1999
            }
2000
2001 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2002 2
                return 'RTL';
2003
            }
2004
        }
2005
2006 2
        $c = static::chr_to_decimal($char);
2007
2008 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2009 2
            return 'LTR';
2010
        }
2011
2012 2
        if ($c <= 0x85e) {
2013 2
            if ($c === 0x5be ||
2014 2
                $c === 0x5c0 ||
2015 2
                $c === 0x5c3 ||
2016 2
                $c === 0x5c6 ||
2017 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2018 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2019 2
                $c === 0x608 ||
2020 2
                $c === 0x60b ||
2021 2
                $c === 0x60d ||
2022 2
                $c === 0x61b ||
2023 2
                ($c >= 0x61e && $c <= 0x64a) ||
2024
                ($c >= 0x66d && $c <= 0x66f) ||
2025
                ($c >= 0x671 && $c <= 0x6d5) ||
2026
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2027
                ($c >= 0x6ee && $c <= 0x6ef) ||
2028
                ($c >= 0x6fa && $c <= 0x70d) ||
2029
                $c === 0x710 ||
2030
                ($c >= 0x712 && $c <= 0x72f) ||
2031
                ($c >= 0x74d && $c <= 0x7a5) ||
2032
                $c === 0x7b1 ||
2033
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2034
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2035
                $c === 0x7fa ||
2036
                ($c >= 0x800 && $c <= 0x815) ||
2037
                $c === 0x81a ||
2038
                $c === 0x824 ||
2039
                $c === 0x828 ||
2040
                ($c >= 0x830 && $c <= 0x83e) ||
2041
                ($c >= 0x840 && $c <= 0x858) ||
2042 2
                $c === 0x85e
2043
            ) {
2044 2
                return 'RTL';
2045
            }
2046 2
        } elseif ($c === 0x200f) {
2047
            return 'RTL';
2048 2
        } elseif ($c >= 0xfb1d) {
2049 2
            if ($c === 0xfb1d ||
2050 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2051 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2052 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2053 2
                $c === 0xfb3e ||
2054 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2055 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2056 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2057 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2058 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2059 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2060 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2061 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2062 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2063 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2064 2
                $c === 0x10808 ||
2065 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2066 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2067 2
                $c === 0x1083c ||
2068 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2069 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2070 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2071 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2072 2
                $c === 0x1093f ||
2073 2
                $c === 0x10a00 ||
2074 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2075 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2076 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2077 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2078 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2079 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2080 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2081 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2082 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2083 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2084
            ) {
2085 2
                return 'RTL';
2086
            }
2087
        }
2088
2089 2
        return 'LTR';
2090
    }
2091
2092
    /**
2093
     * Check for php-support.
2094
     *
2095
     * @param string|null $key
2096
     *
2097
     * @return mixed
2098
     *               Return the full support-"array", if $key === null<br>
2099
     *               return bool-value, if $key is used and available<br>
2100
     *               otherwise return <strong>null</strong>
2101
     */
2102 27
    public static function getSupportInfo(string $key = null)
2103
    {
2104 27
        if ($key === null) {
2105 4
            return self::$SUPPORT;
2106
        }
2107
2108 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2109 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2110
        }
2111
        // compatibility fix for old versions
2112 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2113
2114 25
        return self::$SUPPORT[$key] ?? null;
2115
    }
2116
2117
    /**
2118
     * Warning: this method only works for some file-types (png, jpg)
2119
     *          if you need more supported types, please use e.g. "finfo"
2120
     *
2121
     * @param string $str
2122
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2123
     *
2124
     * @return array
2125
     *               with this keys: 'ext', 'mime', 'type'
2126
     */
2127 39
    public static function get_file_type(
2128
        string $str,
2129
        array $fallback = [
2130
            'ext'  => null,
2131
            'mime' => 'application/octet-stream',
2132
            'type' => null,
2133
        ]
2134
    ): array {
2135 39
        if ($str === '') {
2136
            return $fallback;
2137
        }
2138
2139 39
        $str_info = \substr($str, 0, 2);
2140 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2141 11
            return $fallback;
2142
        }
2143
2144
        // DEBUG
2145
        //var_dump($str_info);
2146
2147 35
        $str_info = \unpack('C2chars', $str_info);
2148 35
        if ($str_info === false) {
2149
            return $fallback;
2150
        }
2151
        /** @noinspection OffsetOperationsInspection */
2152 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2153
2154
        // DEBUG
2155
        //var_dump($type_code);
2156
2157
        //
2158
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2159
        //
2160
        switch ($type_code) {
2161
            // WARNING: do not add too simple comparisons, because of false-positive results:
2162
            //
2163
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2164
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2165
            //
2166 35
            case 255216:
2167
                $ext = 'jpg';
2168
                $mime = 'image/jpeg';
2169
                $type = 'binary';
2170
2171
                break;
2172 35
            case 13780:
2173 7
                $ext = 'png';
2174 7
                $mime = 'image/png';
2175 7
                $type = 'binary';
2176
2177 7
                break;
2178
            default:
2179 34
                return $fallback;
2180
        }
2181
2182
        return [
2183 7
            'ext'  => $ext,
2184 7
            'mime' => $mime,
2185 7
            'type' => $type,
2186
        ];
2187
    }
2188
2189
    /**
2190
     * @param int    $length         <p>Length of the random string.</p>
2191
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2192
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2193
     *
2194
     * @return string
2195
     */
2196 1
    public static function get_random_string(
2197
        int $length,
2198
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2199
        string $encoding = 'UTF-8'
2200
    ): string {
2201
        // init
2202 1
        $i = 0;
2203 1
        $str = '';
2204
2205
        //
2206
        // add random chars
2207
        //
2208
2209 1
        if ($encoding === 'UTF-8') {
2210 1
            $max_length = (int) \mb_strlen($possible_chars);
2211 1
            if ($max_length === 0) {
2212 1
                return '';
2213
            }
2214
2215 1
            while ($i < $length) {
2216
                try {
2217 1
                    $rand_int = \random_int(0, $max_length - 1);
2218
                } catch (\Exception $e) {
2219
                    /** @noinspection RandomApiMigrationInspection */
2220
                    $rand_int = \mt_rand(0, $max_length - 1);
2221
                }
2222 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2223 1
                if ($char !== false) {
2224 1
                    $str .= $char;
2225 1
                    ++$i;
2226
                }
2227
            }
2228
        } else {
2229
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2230
2231
            $max_length = (int) self::strlen($possible_chars, $encoding);
2232
            if ($max_length === 0) {
2233
                return '';
2234
            }
2235
2236
            while ($i < $length) {
2237
                try {
2238
                    $rand_int = \random_int(0, $max_length - 1);
2239
                } catch (\Exception $e) {
2240
                    /** @noinspection RandomApiMigrationInspection */
2241
                    $rand_int = \mt_rand(0, $max_length - 1);
2242
                }
2243
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2244
                if ($char !== false) {
2245
                    $str .= $char;
2246
                    ++$i;
2247
                }
2248
            }
2249
        }
2250
2251 1
        return $str;
2252
    }
2253
2254
    /**
2255
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2256
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2257
     *
2258
     * @return string
2259
     */
2260 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2261
    {
2262 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2263 1
                        \session_id() .
2264 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2265 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2266 1
                        $entropy_extra;
2267
2268 1
        $unique_string = \uniqid($unique_helper, true);
2269
2270 1
        if ($use_md5) {
2271 1
            $unique_string = \md5($unique_string . $unique_helper);
2272
        }
2273
2274 1
        return $unique_string;
2275
    }
2276
2277
    /**
2278
     * alias for "UTF8::string_has_bom()"
2279
     *
2280
     * @param string $str
2281
     *
2282
     * @return bool
2283
     *
2284
     * @see UTF8::string_has_bom()
2285
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2286
     */
2287 2
    public static function hasBom(string $str): bool
2288
    {
2289 2
        return self::string_has_bom($str);
2290
    }
2291
2292
    /**
2293
     * Returns true if the string contains a lower case char, false otherwise.
2294
     *
2295
     * @param string $str <p>The input string.</p>
2296
     *
2297
     * @return bool whether or not the string contains a lower case character
2298
     */
2299 47
    public static function has_lowercase(string $str): bool
2300
    {
2301 47
        if (self::$SUPPORT['mbstring'] === true) {
2302
            /** @noinspection PhpComposerExtensionStubsInspection */
2303 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2304
        }
2305
2306
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2307
    }
2308
2309
    /**
2310
     * Returns true if the string contains an upper case char, false otherwise.
2311
     *
2312
     * @param string $str <p>The input string.</p>
2313
     *
2314
     * @return bool whether or not the string contains an upper case character
2315
     */
2316 12
    public static function has_uppercase(string $str): bool
2317
    {
2318 12
        if (self::$SUPPORT['mbstring'] === true) {
2319
            /** @noinspection PhpComposerExtensionStubsInspection */
2320 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2321
        }
2322
2323
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2324
    }
2325
2326
    /**
2327
     * Converts a hexadecimal value into a UTF-8 character.
2328
     *
2329
     * @param string $hexdec <p>The hexadecimal value.</p>
2330
     *
2331
     * @return false|string one single UTF-8 character
2332
     */
2333 4
    public static function hex_to_chr(string $hexdec)
2334
    {
2335 4
        return self::decimal_to_chr(\hexdec($hexdec));
2336
    }
2337
2338
    /**
2339
     * Converts hexadecimal U+xxxx code point representation to integer.
2340
     *
2341
     * INFO: opposite to UTF8::int_to_hex()
2342
     *
2343
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2344
     *
2345
     * @return false|int the code point, or false on failure
2346
     */
2347 2
    public static function hex_to_int($hexdec)
2348
    {
2349
        // init
2350 2
        $hexdec = (string) $hexdec;
2351
2352 2
        if ($hexdec === '') {
2353 2
            return false;
2354
        }
2355
2356 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2357 2
            return \intval($match[1], 16);
2358
        }
2359
2360 2
        return false;
2361
    }
2362
2363
    /**
2364
     * alias for "UTF8::html_entity_decode()"
2365
     *
2366
     * @param string $str
2367
     * @param int    $flags
2368
     * @param string $encoding
2369
     *
2370
     * @return string
2371
     *
2372
     * @see UTF8::html_entity_decode()
2373
     */
2374 4
    public static function html_decode(
2375
        string $str,
2376
        int $flags = null,
2377
        string $encoding = 'UTF-8'
2378
    ): string {
2379 4
        return self::html_entity_decode($str, $flags, $encoding);
2380
    }
2381
2382
    /**
2383
     * Converts a UTF-8 string to a series of HTML numbered entities.
2384
     *
2385
     * INFO: opposite to UTF8::html_decode()
2386
     *
2387
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2388
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2389
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2390
     *
2391
     * @return string HTML numbered entities
2392
     */
2393 14
    public static function html_encode(
2394
        string $str,
2395
        bool $keep_ascii_chars = false,
2396
        string $encoding = 'UTF-8'
2397
    ): string {
2398 14
        if ($str === '') {
2399 4
            return '';
2400
        }
2401
2402 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2403 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2404
        }
2405
2406
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2407 14
        if (self::$SUPPORT['mbstring'] === true) {
2408 14
            $start_code = 0x00;
2409 14
            if ($keep_ascii_chars === true) {
2410 13
                $start_code = 0x80;
2411
            }
2412
2413 14
            if ($encoding === 'UTF-8') {
2414 14
                $return = \mb_encode_numericentity(
2415 14
                    $str,
2416 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2417
                );
2418
2419 14
                if ($return !== null && $return !== false) {
2420 14
                    return $return;
2421
                }
2422
            }
2423
2424 4
            $return = \mb_encode_numericentity(
2425 4
                $str,
2426 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2427 4
                $encoding
2428
            );
2429
2430 4
            if ($return !== null && $return !== false) {
2431 4
                return $return;
2432
            }
2433
        }
2434
2435
        //
2436
        // fallback via vanilla php
2437
        //
2438
2439
        return \implode(
2440
            '',
2441
            \array_map(
2442
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2443
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2444
                },
2445
                self::str_split($str)
2446
            )
2447
        );
2448
    }
2449
2450
    /**
2451
     * UTF-8 version of html_entity_decode()
2452
     *
2453
     * The reason we are not using html_entity_decode() by itself is because
2454
     * while it is not technically correct to leave out the semicolon
2455
     * at the end of an entity most browsers will still interpret the entity
2456
     * correctly. html_entity_decode() does not convert entities without
2457
     * semicolons, so we are left with our own little solution here. Bummer.
2458
     *
2459
     * Convert all HTML entities to their applicable characters
2460
     *
2461
     * INFO: opposite to UTF8::html_encode()
2462
     *
2463
     * @see http://php.net/manual/en/function.html-entity-decode.php
2464
     *
2465
     * @param string $str      <p>
2466
     *                         The input string.
2467
     *                         </p>
2468
     * @param int    $flags    [optional] <p>
2469
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2470
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2471
     *                         <table>
2472
     *                         Available <i>flags</i> constants
2473
     *                         <tr valign="top">
2474
     *                         <td>Constant Name</td>
2475
     *                         <td>Description</td>
2476
     *                         </tr>
2477
     *                         <tr valign="top">
2478
     *                         <td><b>ENT_COMPAT</b></td>
2479
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2480
     *                         </tr>
2481
     *                         <tr valign="top">
2482
     *                         <td><b>ENT_QUOTES</b></td>
2483
     *                         <td>Will convert both double and single quotes.</td>
2484
     *                         </tr>
2485
     *                         <tr valign="top">
2486
     *                         <td><b>ENT_NOQUOTES</b></td>
2487
     *                         <td>Will leave both double and single quotes unconverted.</td>
2488
     *                         </tr>
2489
     *                         <tr valign="top">
2490
     *                         <td><b>ENT_HTML401</b></td>
2491
     *                         <td>
2492
     *                         Handle code as HTML 4.01.
2493
     *                         </td>
2494
     *                         </tr>
2495
     *                         <tr valign="top">
2496
     *                         <td><b>ENT_XML1</b></td>
2497
     *                         <td>
2498
     *                         Handle code as XML 1.
2499
     *                         </td>
2500
     *                         </tr>
2501
     *                         <tr valign="top">
2502
     *                         <td><b>ENT_XHTML</b></td>
2503
     *                         <td>
2504
     *                         Handle code as XHTML.
2505
     *                         </td>
2506
     *                         </tr>
2507
     *                         <tr valign="top">
2508
     *                         <td><b>ENT_HTML5</b></td>
2509
     *                         <td>
2510
     *                         Handle code as HTML 5.
2511
     *                         </td>
2512
     *                         </tr>
2513
     *                         </table>
2514
     *                         </p>
2515
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2516
     *
2517
     * @return string the decoded string
2518
     */
2519 46
    public static function html_entity_decode(
2520
        string $str,
2521
        int $flags = null,
2522
        string $encoding = 'UTF-8'
2523
    ): string {
2524
        if (
2525 46
            !isset($str[3]) // examples: &; || &x;
2526
            ||
2527 46
            \strpos($str, '&') === false // no "&"
2528
        ) {
2529 23
            return $str;
2530
        }
2531
2532 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2533 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2534
        }
2535
2536 44
        if ($flags === null) {
2537 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2538
        }
2539
2540
        if (
2541 44
            $encoding !== 'UTF-8'
2542
            &&
2543 44
            $encoding !== 'ISO-8859-1'
2544
            &&
2545 44
            $encoding !== 'WINDOWS-1252'
2546
            &&
2547 44
            self::$SUPPORT['mbstring'] === false
2548
        ) {
2549
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2550
        }
2551
2552
        do {
2553 44
            $str_compare = $str;
2554
2555
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2556 44
            if (self::$SUPPORT['mbstring'] === true) {
2557 44
                if ($encoding === 'UTF-8') {
2558 44
                    $strTmp = \mb_decode_numericentity(
2559 44
                        $str,
2560 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2561
                    );
2562
                } else {
2563 4
                    $strTmp = \mb_decode_numericentity(
2564 4
                        $str,
2565 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2566 4
                        $encoding
2567
                    );
2568
                }
2569
2570 44
                if ($strTmp === null || $strTmp === false) {
2571 44
                    $str = self::html_entity_decode_helper($str, $encoding);
2572
                }
2573
            } else {
2574
                $str = self::html_entity_decode_helper($str, $encoding);
2575
            }
2576
2577 44
            if (\strpos($str, '&') !== false) {
2578 44
                if (\strpos($str, '&#') !== false) {
2579
                    // decode also numeric & UTF16 two byte entities
2580 36
                    $str = (string) \preg_replace(
2581 36
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2582 36
                        '$1;',
2583 36
                        $str
2584
                    );
2585
                }
2586
2587 44
                $str = \html_entity_decode(
2588 44
                    $str,
2589 44
                    $flags,
2590 44
                    $encoding
2591
                );
2592
            }
2593 44
        } while ($str_compare !== $str);
2594
2595 44
        return $str;
2596
    }
2597
2598
    /**
2599
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2600
     *
2601
     * @param string $str
2602
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2603
     *
2604
     * @return string
2605
     */
2606 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2607
    {
2608 6
        return self::htmlspecialchars(
2609 6
            $str,
2610 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2611 6
            $encoding
2612
        );
2613
    }
2614
2615
    /**
2616
     * Remove empty html-tag.
2617
     *
2618
     * e.g.: <tag></tag>
2619
     *
2620
     * @param string $str
2621
     *
2622
     * @return string
2623
     */
2624 1
    public static function html_stripe_empty_tags(string $str): string
2625
    {
2626 1
        return (string) \preg_replace(
2627 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2628 1
            '',
2629 1
            $str
2630
        );
2631
    }
2632
2633
    /**
2634
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2635
     *
2636
     * @see http://php.net/manual/en/function.htmlentities.php
2637
     *
2638
     * @param string $str           <p>
2639
     *                              The input string.
2640
     *                              </p>
2641
     * @param int    $flags         [optional] <p>
2642
     *                              A bitmask of one or more of the following flags, which specify how to handle
2643
     *                              quotes, invalid code unit sequences and the used document type. The default is
2644
     *                              ENT_COMPAT | ENT_HTML401.
2645
     *                              <table>
2646
     *                              Available <i>flags</i> constants
2647
     *                              <tr valign="top">
2648
     *                              <td>Constant Name</td>
2649
     *                              <td>Description</td>
2650
     *                              </tr>
2651
     *                              <tr valign="top">
2652
     *                              <td><b>ENT_COMPAT</b></td>
2653
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2654
     *                              </tr>
2655
     *                              <tr valign="top">
2656
     *                              <td><b>ENT_QUOTES</b></td>
2657
     *                              <td>Will convert both double and single quotes.</td>
2658
     *                              </tr>
2659
     *                              <tr valign="top">
2660
     *                              <td><b>ENT_NOQUOTES</b></td>
2661
     *                              <td>Will leave both double and single quotes unconverted.</td>
2662
     *                              </tr>
2663
     *                              <tr valign="top">
2664
     *                              <td><b>ENT_IGNORE</b></td>
2665
     *                              <td>
2666
     *                              Silently discard invalid code unit sequences instead of returning
2667
     *                              an empty string. Using this flag is discouraged as it
2668
     *                              may have security implications.
2669
     *                              </td>
2670
     *                              </tr>
2671
     *                              <tr valign="top">
2672
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2673
     *                              <td>
2674
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2675
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2676
     *                              string.
2677
     *                              </td>
2678
     *                              </tr>
2679
     *                              <tr valign="top">
2680
     *                              <td><b>ENT_DISALLOWED</b></td>
2681
     *                              <td>
2682
     *                              Replace invalid code points for the given document type with a
2683
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2684
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2685
     *                              instance, to ensure the well-formedness of XML documents with
2686
     *                              embedded external content.
2687
     *                              </td>
2688
     *                              </tr>
2689
     *                              <tr valign="top">
2690
     *                              <td><b>ENT_HTML401</b></td>
2691
     *                              <td>
2692
     *                              Handle code as HTML 4.01.
2693
     *                              </td>
2694
     *                              </tr>
2695
     *                              <tr valign="top">
2696
     *                              <td><b>ENT_XML1</b></td>
2697
     *                              <td>
2698
     *                              Handle code as XML 1.
2699
     *                              </td>
2700
     *                              </tr>
2701
     *                              <tr valign="top">
2702
     *                              <td><b>ENT_XHTML</b></td>
2703
     *                              <td>
2704
     *                              Handle code as XHTML.
2705
     *                              </td>
2706
     *                              </tr>
2707
     *                              <tr valign="top">
2708
     *                              <td><b>ENT_HTML5</b></td>
2709
     *                              <td>
2710
     *                              Handle code as HTML 5.
2711
     *                              </td>
2712
     *                              </tr>
2713
     *                              </table>
2714
     *                              </p>
2715
     * @param string $encoding      [optional] <p>
2716
     *                              Like <b>htmlspecialchars</b>,
2717
     *                              <b>htmlentities</b> takes an optional third argument
2718
     *                              <i>encoding</i> which defines encoding used in
2719
     *                              conversion.
2720
     *                              Although this argument is technically optional, you are highly
2721
     *                              encouraged to specify the correct value for your code.
2722
     *                              </p>
2723
     * @param bool   $double_encode [optional] <p>
2724
     *                              When <i>double_encode</i> is turned off PHP will not
2725
     *                              encode existing html entities. The default is to convert everything.
2726
     *                              </p>
2727
     *
2728
     * @return string
2729
     *                <p>
2730
     *                The encoded string.
2731
     *                <br><br>
2732
     *                If the input <i>string</i> contains an invalid code unit
2733
     *                sequence within the given <i>encoding</i> an empty string
2734
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2735
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2736
     *                </p>
2737
     */
2738 9
    public static function htmlentities(
2739
        string $str,
2740
        int $flags = \ENT_COMPAT,
2741
        string $encoding = 'UTF-8',
2742
        bool $double_encode = true
2743
    ): string {
2744 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2745 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2746
        }
2747
2748 9
        $str = \htmlentities(
2749 9
            $str,
2750 9
            $flags,
2751 9
            $encoding,
2752 9
            $double_encode
2753
        );
2754
2755
        /**
2756
         * PHP doesn't replace a backslash to its html entity since this is something
2757
         * that's mostly used to escape characters when inserting in a database. Since
2758
         * we're using a decent database layer, we don't need this shit and we're replacing
2759
         * the double backslashes by its' html entity equivalent.
2760
         *
2761
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2762
         */
2763 9
        $str = \str_replace('\\', '&#92;', $str);
2764
2765 9
        return self::html_encode($str, true, $encoding);
2766
    }
2767
2768
    /**
2769
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2770
     *
2771
     * INFO: Take a look at "UTF8::htmlentities()"
2772
     *
2773
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2774
     *
2775
     * @param string $str           <p>
2776
     *                              The string being converted.
2777
     *                              </p>
2778
     * @param int    $flags         [optional] <p>
2779
     *                              A bitmask of one or more of the following flags, which specify how to handle
2780
     *                              quotes, invalid code unit sequences and the used document type. The default is
2781
     *                              ENT_COMPAT | ENT_HTML401.
2782
     *                              <table>
2783
     *                              Available <i>flags</i> constants
2784
     *                              <tr valign="top">
2785
     *                              <td>Constant Name</td>
2786
     *                              <td>Description</td>
2787
     *                              </tr>
2788
     *                              <tr valign="top">
2789
     *                              <td><b>ENT_COMPAT</b></td>
2790
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2791
     *                              </tr>
2792
     *                              <tr valign="top">
2793
     *                              <td><b>ENT_QUOTES</b></td>
2794
     *                              <td>Will convert both double and single quotes.</td>
2795
     *                              </tr>
2796
     *                              <tr valign="top">
2797
     *                              <td><b>ENT_NOQUOTES</b></td>
2798
     *                              <td>Will leave both double and single quotes unconverted.</td>
2799
     *                              </tr>
2800
     *                              <tr valign="top">
2801
     *                              <td><b>ENT_IGNORE</b></td>
2802
     *                              <td>
2803
     *                              Silently discard invalid code unit sequences instead of returning
2804
     *                              an empty string. Using this flag is discouraged as it
2805
     *                              may have security implications.
2806
     *                              </td>
2807
     *                              </tr>
2808
     *                              <tr valign="top">
2809
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2810
     *                              <td>
2811
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2812
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2813
     *                              string.
2814
     *                              </td>
2815
     *                              </tr>
2816
     *                              <tr valign="top">
2817
     *                              <td><b>ENT_DISALLOWED</b></td>
2818
     *                              <td>
2819
     *                              Replace invalid code points for the given document type with a
2820
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2821
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2822
     *                              instance, to ensure the well-formedness of XML documents with
2823
     *                              embedded external content.
2824
     *                              </td>
2825
     *                              </tr>
2826
     *                              <tr valign="top">
2827
     *                              <td><b>ENT_HTML401</b></td>
2828
     *                              <td>
2829
     *                              Handle code as HTML 4.01.
2830
     *                              </td>
2831
     *                              </tr>
2832
     *                              <tr valign="top">
2833
     *                              <td><b>ENT_XML1</b></td>
2834
     *                              <td>
2835
     *                              Handle code as XML 1.
2836
     *                              </td>
2837
     *                              </tr>
2838
     *                              <tr valign="top">
2839
     *                              <td><b>ENT_XHTML</b></td>
2840
     *                              <td>
2841
     *                              Handle code as XHTML.
2842
     *                              </td>
2843
     *                              </tr>
2844
     *                              <tr valign="top">
2845
     *                              <td><b>ENT_HTML5</b></td>
2846
     *                              <td>
2847
     *                              Handle code as HTML 5.
2848
     *                              </td>
2849
     *                              </tr>
2850
     *                              </table>
2851
     *                              </p>
2852
     * @param string $encoding      [optional] <p>
2853
     *                              Defines encoding used in conversion.
2854
     *                              </p>
2855
     *                              <p>
2856
     *                              For the purposes of this function, the encodings
2857
     *                              ISO-8859-1, ISO-8859-15,
2858
     *                              UTF-8, cp866,
2859
     *                              cp1251, cp1252, and
2860
     *                              KOI8-R are effectively equivalent, provided the
2861
     *                              <i>string</i> itself is valid for the encoding, as
2862
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2863
     *                              the same positions in all of these encodings.
2864
     *                              </p>
2865
     * @param bool   $double_encode [optional] <p>
2866
     *                              When <i>double_encode</i> is turned off PHP will not
2867
     *                              encode existing html entities, the default is to convert everything.
2868
     *                              </p>
2869
     *
2870
     * @return string the converted string.
2871
     *                </p>
2872
     *                <p>
2873
     *                If the input <i>string</i> contains an invalid code unit
2874
     *                sequence within the given <i>encoding</i> an empty string
2875
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2876
     *                <b>ENT_SUBSTITUTE</b> flags are set
2877
     */
2878 8
    public static function htmlspecialchars(
2879
        string $str,
2880
        int $flags = \ENT_COMPAT,
2881
        string $encoding = 'UTF-8',
2882
        bool $double_encode = true
2883
    ): string {
2884 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2885 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2886
        }
2887
2888 8
        return \htmlspecialchars(
2889 8
            $str,
2890 8
            $flags,
2891 8
            $encoding,
2892 8
            $double_encode
2893
        );
2894
    }
2895
2896
    /**
2897
     * Checks whether iconv is available on the server.
2898
     *
2899
     * @return bool
2900
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2901
     */
2902
    public static function iconv_loaded(): bool
2903
    {
2904
        return \extension_loaded('iconv');
2905
    }
2906
2907
    /**
2908
     * alias for "UTF8::decimal_to_chr()"
2909
     *
2910
     * @param mixed $int
2911
     *
2912
     * @return string
2913
     *
2914
     * @see UTF8::decimal_to_chr()
2915
     */
2916 4
    public static function int_to_chr($int): string
2917
    {
2918 4
        return self::decimal_to_chr($int);
2919
    }
2920
2921
    /**
2922
     * Converts Integer to hexadecimal U+xxxx code point representation.
2923
     *
2924
     * INFO: opposite to UTF8::hex_to_int()
2925
     *
2926
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2927
     * @param string $prefix [optional]
2928
     *
2929
     * @return string the code point, or empty string on failure
2930
     */
2931 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2932
    {
2933 6
        $hex = \dechex($int);
2934
2935 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2936
2937 6
        return $prefix . $hex . '';
2938
    }
2939
2940
    /**
2941
     * Checks whether intl-char is available on the server.
2942
     *
2943
     * @return bool
2944
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2945
     */
2946
    public static function intlChar_loaded(): bool
2947
    {
2948
        return \class_exists('IntlChar');
2949
    }
2950
2951
    /**
2952
     * Checks whether intl is available on the server.
2953
     *
2954
     * @return bool
2955
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2956
     */
2957 5
    public static function intl_loaded(): bool
2958
    {
2959 5
        return \extension_loaded('intl');
2960
    }
2961
2962
    /**
2963
     * alias for "UTF8::is_ascii()"
2964
     *
2965
     * @param string $str
2966
     *
2967
     * @return bool
2968
     *
2969
     * @see UTF8::is_ascii()
2970
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2971
     */
2972 2
    public static function isAscii(string $str): bool
2973
    {
2974 2
        return ASCII::is_ascii($str);
2975
    }
2976
2977
    /**
2978
     * alias for "UTF8::is_base64()"
2979
     *
2980
     * @param string $str
2981
     *
2982
     * @return bool
2983
     *
2984
     * @see UTF8::is_base64()
2985
     * @deprecated <p>please use "UTF8::is_base64()"</p>
2986
     */
2987 2
    public static function isBase64($str): bool
2988
    {
2989 2
        return self::is_base64($str);
2990
    }
2991
2992
    /**
2993
     * alias for "UTF8::is_binary()"
2994
     *
2995
     * @param mixed $str
2996
     * @param bool  $strict
2997
     *
2998
     * @return bool
2999
     *
3000
     * @see UTF8::is_binary()
3001
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3002
     */
3003 4
    public static function isBinary($str, $strict = false): bool
3004
    {
3005 4
        return self::is_binary($str, $strict);
3006
    }
3007
3008
    /**
3009
     * alias for "UTF8::is_bom()"
3010
     *
3011
     * @param string $utf8_chr
3012
     *
3013
     * @return bool
3014
     *
3015
     * @see UTF8::is_bom()
3016
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3017
     */
3018 2
    public static function isBom(string $utf8_chr): bool
3019
    {
3020 2
        return self::is_bom($utf8_chr);
3021
    }
3022
3023
    /**
3024
     * alias for "UTF8::is_html()"
3025
     *
3026
     * @param string $str
3027
     *
3028
     * @return bool
3029
     *
3030
     * @see UTF8::is_html()
3031
     * @deprecated <p>please use "UTF8::is_html()"</p>
3032
     */
3033 2
    public static function isHtml(string $str): bool
3034
    {
3035 2
        return self::is_html($str);
3036
    }
3037
3038
    /**
3039
     * alias for "UTF8::is_json()"
3040
     *
3041
     * @param string $str
3042
     *
3043
     * @return bool
3044
     *
3045
     * @see UTF8::is_json()
3046
     * @deprecated <p>please use "UTF8::is_json()"</p>
3047
     */
3048
    public static function isJson(string $str): bool
3049
    {
3050
        return self::is_json($str);
3051
    }
3052
3053
    /**
3054
     * alias for "UTF8::is_utf16()"
3055
     *
3056
     * @param mixed $str
3057
     *
3058
     * @return false|int
3059
     *                   <strong>false</strong> if is't not UTF16,<br>
3060
     *                   <strong>1</strong> for UTF-16LE,<br>
3061
     *                   <strong>2</strong> for UTF-16BE
3062
     *
3063
     * @see UTF8::is_utf16()
3064
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3065
     */
3066 2
    public static function isUtf16($str)
3067
    {
3068 2
        return self::is_utf16($str);
3069
    }
3070
3071
    /**
3072
     * alias for "UTF8::is_utf32()"
3073
     *
3074
     * @param mixed $str
3075
     *
3076
     * @return false|int
3077
     *                   <strong>false</strong> if is't not UTF16,
3078
     *                   <strong>1</strong> for UTF-32LE,
3079
     *                   <strong>2</strong> for UTF-32BE
3080
     *
3081
     * @see UTF8::is_utf32()
3082
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3083
     */
3084 2
    public static function isUtf32($str)
3085
    {
3086 2
        return self::is_utf32($str);
3087
    }
3088
3089
    /**
3090
     * alias for "UTF8::is_utf8()"
3091
     *
3092
     * @param string $str
3093
     * @param bool   $strict
3094
     *
3095
     * @return bool
3096
     *
3097
     * @see UTF8::is_utf8()
3098
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3099
     */
3100 17
    public static function isUtf8($str, $strict = false): bool
3101
    {
3102 17
        return self::is_utf8($str, $strict);
3103
    }
3104
3105
    /**
3106
     * Returns true if the string contains only alphabetic chars, false otherwise.
3107
     *
3108
     * @param string $str
3109
     *
3110
     * @return bool
3111
     *              Whether or not $str contains only alphabetic chars
3112
     */
3113 10
    public static function is_alpha(string $str): bool
3114
    {
3115 10
        if (self::$SUPPORT['mbstring'] === true) {
3116
            /** @noinspection PhpComposerExtensionStubsInspection */
3117 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3118
        }
3119
3120
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3121
    }
3122
3123
    /**
3124
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3125
     *
3126
     * @param string $str
3127
     *
3128
     * @return bool
3129
     *              Whether or not $str contains only alphanumeric chars
3130
     */
3131 13
    public static function is_alphanumeric(string $str): bool
3132
    {
3133 13
        if (self::$SUPPORT['mbstring'] === true) {
3134
            /** @noinspection PhpComposerExtensionStubsInspection */
3135 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3136
        }
3137
3138
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3139
    }
3140
3141
    /**
3142
     * Checks if a string is 7 bit ASCII.
3143
     *
3144
     * @param string $str <p>The string to check.</p>
3145
     *
3146
     * @return bool
3147
     *              <strong>true</strong> if it is ASCII<br>
3148
     *              <strong>false</strong> otherwise
3149
     */
3150 8
    public static function is_ascii(string $str): bool
3151
    {
3152 8
        return ASCII::is_ascii($str);
3153
    }
3154
3155
    /**
3156
     * Returns true if the string is base64 encoded, false otherwise.
3157
     *
3158
     * @param mixed|string $str                   <p>The input string.</p>
3159
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3160
     *
3161
     * @return bool whether or not $str is base64 encoded
3162
     */
3163 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3164
    {
3165
        if (
3166 16
            $empty_string_is_valid === false
3167
            &&
3168 16
            $str === ''
3169
        ) {
3170 3
            return false;
3171
        }
3172
3173
        /**
3174
         * @psalm-suppress RedundantConditionGivenDocblockType
3175
         */
3176 15
        if (\is_string($str) === false) {
3177 2
            return false;
3178
        }
3179
3180 15
        $base64String = \base64_decode($str, true);
3181
3182 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3183
    }
3184
3185
    /**
3186
     * Check if the input is binary... (is look like a hack).
3187
     *
3188
     * @param mixed $input
3189
     * @param bool  $strict
3190
     *
3191
     * @return bool
3192
     */
3193 39
    public static function is_binary($input, bool $strict = false): bool
3194
    {
3195 39
        $input = (string) $input;
3196 39
        if ($input === '') {
3197 10
            return false;
3198
        }
3199
3200 39
        if (\preg_match('~^[01]+$~', $input)) {
3201 13
            return true;
3202
        }
3203
3204 39
        $ext = self::get_file_type($input);
3205 39
        if ($ext['type'] === 'binary') {
3206 7
            return true;
3207
        }
3208
3209 38
        $test_length = \strlen($input);
3210 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3211 38
        if (($test_null_counting / $test_length) > 0.25) {
3212 15
            return true;
3213
        }
3214
3215 34
        if ($strict === true) {
3216 34
            if (self::$SUPPORT['finfo'] === false) {
3217
                throw new \RuntimeException('ext-fileinfo: is not installed');
3218
            }
3219
3220
            /** @noinspection PhpComposerExtensionStubsInspection */
3221 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3222 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3223 15
                return true;
3224
            }
3225
        }
3226
3227 30
        return false;
3228
    }
3229
3230
    /**
3231
     * Check if the file is binary.
3232
     *
3233
     * @param string $file
3234
     *
3235
     * @return bool
3236
     */
3237 6
    public static function is_binary_file($file): bool
3238
    {
3239
        // init
3240 6
        $block = '';
3241
3242 6
        $fp = \fopen($file, 'rb');
3243 6
        if (\is_resource($fp)) {
3244 6
            $block = \fread($fp, 512);
3245 6
            \fclose($fp);
3246
        }
3247
3248 6
        if ($block === '') {
3249 2
            return false;
3250
        }
3251
3252 6
        return self::is_binary($block, true);
3253
    }
3254
3255
    /**
3256
     * Returns true if the string contains only whitespace chars, false otherwise.
3257
     *
3258
     * @param string $str
3259
     *
3260
     * @return bool
3261
     *              Whether or not $str contains only whitespace characters
3262
     */
3263 15
    public static function is_blank(string $str): bool
3264
    {
3265 15
        if (self::$SUPPORT['mbstring'] === true) {
3266
            /** @noinspection PhpComposerExtensionStubsInspection */
3267 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3268
        }
3269
3270
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3271
    }
3272
3273
    /**
3274
     * Checks if the given string is equal to any "Byte Order Mark".
3275
     *
3276
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3277
     *
3278
     * @param string $str <p>The input string.</p>
3279
     *
3280
     * @return bool
3281
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3282
     */
3283 2
    public static function is_bom($str): bool
3284
    {
3285
        /** @noinspection PhpUnusedLocalVariableInspection */
3286 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3287 2
            if ($str === $bom_string) {
3288 2
                return true;
3289
            }
3290
        }
3291
3292 2
        return false;
3293
    }
3294
3295
    /**
3296
     * Determine whether the string is considered to be empty.
3297
     *
3298
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3299
     * empty() does not generate a warning if the variable does not exist.
3300
     *
3301
     * @param mixed $str
3302
     *
3303
     * @return bool whether or not $str is empty()
3304
     */
3305
    public static function is_empty($str): bool
3306
    {
3307
        return empty($str);
3308
    }
3309
3310
    /**
3311
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3312
     *
3313
     * @param string $str
3314
     *
3315
     * @return bool
3316
     *              Whether or not $str contains only hexadecimal chars
3317
     */
3318 13
    public static function is_hexadecimal(string $str): bool
3319
    {
3320 13
        if (self::$SUPPORT['mbstring'] === true) {
3321
            /** @noinspection PhpComposerExtensionStubsInspection */
3322 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3323
        }
3324
3325
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3326
    }
3327
3328
    /**
3329
     * Check if the string contains any HTML tags.
3330
     *
3331
     * @param string $str <p>The input string.</p>
3332
     *
3333
     * @return bool
3334
     */
3335 3
    public static function is_html(string $str): bool
3336
    {
3337 3
        if ($str === '') {
3338 3
            return false;
3339
        }
3340
3341
        // init
3342 3
        $matches = [];
3343
3344 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3345
3346 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3347
3348 3
        return \count($matches) !== 0;
3349
    }
3350
3351
    /**
3352
     * Try to check if "$str" is a JSON-string.
3353
     *
3354
     * @param string $str                                    <p>The input string.</p>
3355
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3356
     *
3357
     * @return bool
3358
     */
3359 42
    public static function is_json(
3360
        string $str,
3361
        $only_array_or_object_results_are_valid = true
3362
    ): bool {
3363 42
        if ($str === '') {
3364 4
            return false;
3365
        }
3366
3367 40
        if (self::$SUPPORT['json'] === false) {
3368
            throw new \RuntimeException('ext-json: is not installed');
3369
        }
3370
3371 40
        $json = self::json_decode($str);
3372 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3373 18
            return false;
3374
        }
3375
3376
        if (
3377 24
            $only_array_or_object_results_are_valid === true
3378
            &&
3379 24
            \is_object($json) === false
3380
            &&
3381 24
            \is_array($json) === false
3382
        ) {
3383 5
            return false;
3384
        }
3385
3386
        /** @noinspection PhpComposerExtensionStubsInspection */
3387 19
        return \json_last_error() === \JSON_ERROR_NONE;
3388
    }
3389
3390
    /**
3391
     * @param string $str
3392
     *
3393
     * @return bool
3394
     */
3395 8
    public static function is_lowercase(string $str): bool
3396
    {
3397 8
        if (self::$SUPPORT['mbstring'] === true) {
3398
            /** @noinspection PhpComposerExtensionStubsInspection */
3399 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3400
        }
3401
3402
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3403
    }
3404
3405
    /**
3406
     * Returns true if the string is serialized, false otherwise.
3407
     *
3408
     * @param string $str
3409
     *
3410
     * @return bool whether or not $str is serialized
3411
     */
3412 7
    public static function is_serialized(string $str): bool
3413
    {
3414 7
        if ($str === '') {
3415 1
            return false;
3416
        }
3417
3418
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3419
        /** @noinspection UnserializeExploitsInspection */
3420 6
        return $str === 'b:0;'
3421
               ||
3422 6
               @\unserialize($str) !== false;
3423
    }
3424
3425
    /**
3426
     * Returns true if the string contains only lower case chars, false
3427
     * otherwise.
3428
     *
3429
     * @param string $str <p>The input string.</p>
3430
     *
3431
     * @return bool
3432
     *              <p>Whether or not $str contains only lower case characters.</p>
3433
     */
3434 8
    public static function is_uppercase(string $str): bool
3435
    {
3436 8
        if (self::$SUPPORT['mbstring'] === true) {
3437
            /** @noinspection PhpComposerExtensionStubsInspection */
3438 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3439
        }
3440
3441
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3442
    }
3443
3444
    /**
3445
     * Check if the string is UTF-16.
3446
     *
3447
     * @param mixed $str                       <p>The input string.</p>
3448
     * @param bool  $check_if_string_is_binary
3449
     *
3450
     * @return false|int
3451
     *                   <strong>false</strong> if is't not UTF-16,<br>
3452
     *                   <strong>1</strong> for UTF-16LE,<br>
3453
     *                   <strong>2</strong> for UTF-16BE
3454
     */
3455 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3456
    {
3457
        // init
3458 22
        $str = (string) $str;
3459 22
        $str_chars = [];
3460
3461
        if (
3462 22
            $check_if_string_is_binary === true
3463
            &&
3464 22
            self::is_binary($str, true) === false
3465
        ) {
3466 2
            return false;
3467
        }
3468
3469 22
        if (self::$SUPPORT['mbstring'] === false) {
3470 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3471
        }
3472
3473 22
        $str = self::remove_bom($str);
3474
3475 22
        $maybe_utf16le = 0;
3476 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3477 22
        if ($test) {
3478 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3479 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3480 15
            if ($test3 === $test) {
3481 15
                if (\count($str_chars) === 0) {
3482 15
                    $str_chars = self::count_chars($str, true, false);
3483
                }
3484 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3485 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3486 15
                        ++$maybe_utf16le;
3487
                    }
3488
                }
3489 15
                unset($test3charEmpty);
3490
            }
3491
        }
3492
3493 22
        $maybe_utf16be = 0;
3494 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3495 22
        if ($test) {
3496 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3497 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3498 15
            if ($test3 === $test) {
3499 15
                if (\count($str_chars) === 0) {
3500 7
                    $str_chars = self::count_chars($str, true, false);
3501
                }
3502 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3503 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3504 15
                        ++$maybe_utf16be;
3505
                    }
3506
                }
3507 15
                unset($test3charEmpty);
3508
            }
3509
        }
3510
3511 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3512 7
            if ($maybe_utf16le > $maybe_utf16be) {
3513 5
                return 1;
3514
            }
3515
3516 6
            return 2;
3517
        }
3518
3519 18
        return false;
3520
    }
3521
3522
    /**
3523
     * Check if the string is UTF-32.
3524
     *
3525
     * @param mixed $str                       <p>The input string.</p>
3526
     * @param bool  $check_if_string_is_binary
3527
     *
3528
     * @return false|int
3529
     *                   <strong>false</strong> if is't not UTF-32,<br>
3530
     *                   <strong>1</strong> for UTF-32LE,<br>
3531
     *                   <strong>2</strong> for UTF-32BE
3532
     */
3533 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3534
    {
3535
        // init
3536 20
        $str = (string) $str;
3537 20
        $str_chars = [];
3538
3539
        if (
3540 20
            $check_if_string_is_binary === true
3541
            &&
3542 20
            self::is_binary($str, true) === false
3543
        ) {
3544 2
            return false;
3545
        }
3546
3547 20
        if (self::$SUPPORT['mbstring'] === false) {
3548 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3549
        }
3550
3551 20
        $str = self::remove_bom($str);
3552
3553 20
        $maybe_utf32le = 0;
3554 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3555 20
        if ($test) {
3556 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3557 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3558 13
            if ($test3 === $test) {
3559 13
                if (\count($str_chars) === 0) {
3560 13
                    $str_chars = self::count_chars($str, true, false);
3561
                }
3562 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3563 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3564 13
                        ++$maybe_utf32le;
3565
                    }
3566
                }
3567 13
                unset($test3charEmpty);
3568
            }
3569
        }
3570
3571 20
        $maybe_utf32be = 0;
3572 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3573 20
        if ($test) {
3574 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3575 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3576 13
            if ($test3 === $test) {
3577 13
                if (\count($str_chars) === 0) {
3578 7
                    $str_chars = self::count_chars($str, true, false);
3579
                }
3580 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3581 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3582 13
                        ++$maybe_utf32be;
3583
                    }
3584
                }
3585 13
                unset($test3charEmpty);
3586
            }
3587
        }
3588
3589 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3590 3
            if ($maybe_utf32le > $maybe_utf32be) {
3591 2
                return 1;
3592
            }
3593
3594 3
            return 2;
3595
        }
3596
3597 20
        return false;
3598
    }
3599
3600
    /**
3601
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3602
     *
3603
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3604
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3605
     *
3606
     * @return bool
3607
     */
3608 82
    public static function is_utf8($str, bool $strict = false): bool
3609
    {
3610 82
        if (\is_array($str) === true) {
3611 2
            foreach ($str as &$v) {
3612 2
                if (self::is_utf8($v, $strict) === false) {
3613 2
                    return false;
3614
                }
3615
            }
3616
3617
            return true;
3618
        }
3619
3620 82
        return self::is_utf8_string((string) $str, $strict);
3621
    }
3622
3623
    /**
3624
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3625
     * Decodes a JSON string
3626
     *
3627
     * @see http://php.net/manual/en/function.json-decode.php
3628
     *
3629
     * @param string $json    <p>
3630
     *                        The <i>json</i> string being decoded.
3631
     *                        </p>
3632
     *                        <p>
3633
     *                        This function only works with UTF-8 encoded strings.
3634
     *                        </p>
3635
     *                        <p>PHP implements a superset of
3636
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3637
     *                        only supports these values when they are nested inside an array or an object.
3638
     *                        </p>
3639
     * @param bool   $assoc   [optional] <p>
3640
     *                        When <b>TRUE</b>, returned objects will be converted into
3641
     *                        associative arrays.
3642
     *                        </p>
3643
     * @param int    $depth   [optional] <p>
3644
     *                        User specified recursion depth.
3645
     *                        </p>
3646
     * @param int    $options [optional] <p>
3647
     *                        Bitmask of JSON decode options. Currently only
3648
     *                        <b>JSON_BIGINT_AS_STRING</b>
3649
     *                        is supported (default is to cast large integers as floats)
3650
     *                        </p>
3651
     *
3652
     * @return mixed
3653
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3654
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3655
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3656
     *               is deeper than the recursion limit.
3657
     */
3658 43
    public static function json_decode(
3659
        string $json,
3660
        bool $assoc = false,
3661
        int $depth = 512,
3662
        int $options = 0
3663
    ) {
3664 43
        $json = self::filter($json);
3665
3666 43
        if (self::$SUPPORT['json'] === false) {
3667
            throw new \RuntimeException('ext-json: is not installed');
3668
        }
3669
3670
        /** @noinspection PhpComposerExtensionStubsInspection */
3671 43
        return \json_decode($json, $assoc, $depth, $options);
3672
    }
3673
3674
    /**
3675
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3676
     * Returns the JSON representation of a value.
3677
     *
3678
     * @see http://php.net/manual/en/function.json-encode.php
3679
     *
3680
     * @param mixed $value   <p>
3681
     *                       The <i>value</i> being encoded. Can be any type except
3682
     *                       a resource.
3683
     *                       </p>
3684
     *                       <p>
3685
     *                       All string data must be UTF-8 encoded.
3686
     *                       </p>
3687
     *                       <p>PHP implements a superset of
3688
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3689
     *                       only supports these values when they are nested inside an array or an object.
3690
     *                       </p>
3691
     * @param int   $options [optional] <p>
3692
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3693
     *                       <b>JSON_HEX_TAG</b>,
3694
     *                       <b>JSON_HEX_AMP</b>,
3695
     *                       <b>JSON_HEX_APOS</b>,
3696
     *                       <b>JSON_NUMERIC_CHECK</b>,
3697
     *                       <b>JSON_PRETTY_PRINT</b>,
3698
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3699
     *                       <b>JSON_FORCE_OBJECT</b>,
3700
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3701
     *                       constants is described on
3702
     *                       the JSON constants page.
3703
     *                       </p>
3704
     * @param int   $depth   [optional] <p>
3705
     *                       Set the maximum depth. Must be greater than zero.
3706
     *                       </p>
3707
     *
3708
     * @return false|string
3709
     *                      A JSON encoded <strong>string</strong> on success or<br>
3710
     *                      <strong>FALSE</strong> on failure
3711
     */
3712 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3713
    {
3714 5
        $value = self::filter($value);
3715
3716 5
        if (self::$SUPPORT['json'] === false) {
3717
            throw new \RuntimeException('ext-json: is not installed');
3718
        }
3719
3720
        /** @noinspection PhpComposerExtensionStubsInspection */
3721 5
        return \json_encode($value, $options, $depth);
3722
    }
3723
3724
    /**
3725
     * Checks whether JSON is available on the server.
3726
     *
3727
     * @return bool
3728
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3729
     */
3730
    public static function json_loaded(): bool
3731
    {
3732
        return \function_exists('json_decode');
3733
    }
3734
3735
    /**
3736
     * Makes string's first char lowercase.
3737
     *
3738
     * @param string      $str                           <p>The input string</p>
3739
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3740
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3741
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3742
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3743
     *
3744
     * @return string the resulting string
3745
     */
3746 46
    public static function lcfirst(
3747
        string $str,
3748
        string $encoding = 'UTF-8',
3749
        bool $clean_utf8 = false,
3750
        string $lang = null,
3751
        bool $try_to_keep_the_string_length = false
3752
    ): string {
3753 46
        if ($clean_utf8 === true) {
3754
            $str = self::clean($str);
3755
        }
3756
3757 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3758
3759 46
        if ($encoding === 'UTF-8') {
3760 43
            $str_part_two = (string) \mb_substr($str, 1);
3761
3762 43
            if ($use_mb_functions === true) {
3763 43
                $str_part_one = \mb_strtolower(
3764 43
                    (string) \mb_substr($str, 0, 1)
3765
                );
3766
            } else {
3767
                $str_part_one = self::strtolower(
3768
                    (string) \mb_substr($str, 0, 1),
3769
                    $encoding,
3770
                    false,
3771
                    $lang,
3772 43
                    $try_to_keep_the_string_length
3773
                );
3774
            }
3775
        } else {
3776 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3777
3778 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3779
3780 3
            $str_part_one = self::strtolower(
3781 3
                (string) self::substr($str, 0, 1, $encoding),
3782 3
                $encoding,
3783 3
                false,
3784 3
                $lang,
3785 3
                $try_to_keep_the_string_length
3786
            );
3787
        }
3788
3789 46
        return $str_part_one . $str_part_two;
3790
    }
3791
3792
    /**
3793
     * alias for "UTF8::lcfirst()"
3794
     *
3795
     * @param string      $str
3796
     * @param string      $encoding
3797
     * @param bool        $clean_utf8
3798
     * @param string|null $lang
3799
     * @param bool        $try_to_keep_the_string_length
3800
     *
3801
     * @return string
3802
     *
3803
     * @see UTF8::lcfirst()
3804
     */
3805 2
    public static function lcword(
3806
        string $str,
3807
        string $encoding = 'UTF-8',
3808
        bool $clean_utf8 = false,
3809
        string $lang = null,
3810
        bool $try_to_keep_the_string_length = false
3811
    ): string {
3812 2
        return self::lcfirst($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3813
    }
3814
3815
    /**
3816
     * Lowercase for all words in the string.
3817
     *
3818
     * @param string      $str                           <p>The input string.</p>
3819
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3820
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3821
     *                                                   a new word.</p>
3822
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3823
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3824
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3825
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3826
     *
3827
     * @return string
3828
     */
3829 2
    public static function lcwords(
3830
        string $str,
3831
        array $exceptions = [],
3832
        string $char_list = '',
3833
        string $encoding = 'UTF-8',
3834
        bool $clean_utf8 = false,
3835
        string $lang = null,
3836
        bool $try_to_keep_the_string_length = false
3837
    ): string {
3838 2
        if (!$str) {
3839 2
            return '';
3840
        }
3841
3842 2
        $words = self::str_to_words($str, $char_list);
3843 2
        $use_exceptions = \count($exceptions) > 0;
3844
3845 2
        foreach ($words as &$word) {
3846 2
            if (!$word) {
3847 2
                continue;
3848
            }
3849
3850
            if (
3851 2
                $use_exceptions === false
3852
                ||
3853 2
                !\in_array($word, $exceptions, true)
3854
            ) {
3855 2
                $word = self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3856
            }
3857
        }
3858
3859 2
        return \implode('', $words);
3860
    }
3861
3862
    /**
3863
     * alias for "UTF8::lcfirst()"
3864
     *
3865
     * @param string      $str
3866
     * @param string      $encoding
3867
     * @param bool        $clean_utf8
3868
     * @param string|null $lang
3869
     * @param bool        $try_to_keep_the_string_length
3870
     *
3871
     * @return string
3872
     *
3873
     * @see UTF8::lcfirst()
3874
     */
3875 5
    public static function lowerCaseFirst(
3876
        string $str,
3877
        string $encoding = 'UTF-8',
3878
        bool $clean_utf8 = false,
3879
        string $lang = null,
3880
        bool $try_to_keep_the_string_length = false
3881
    ): string {
3882 5
        return self::lcfirst($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3883
    }
3884
3885
    /**
3886
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
3887
     *
3888
     * @param string      $str   <p>The string to be trimmed</p>
3889
     * @param string|null $chars <p>Optional characters to be stripped</p>
3890
     *
3891
     * @return string the string with unwanted characters stripped from the left
3892
     */
3893 22
    public static function ltrim(string $str = '', string $chars = null): string
3894
    {
3895 22
        if ($str === '') {
3896 3
            return '';
3897
        }
3898
3899 21
        if ($chars) {
3900 10
            $chars = \preg_quote($chars, '/');
3901 10
            $pattern = "^[${chars}]+";
3902
        } else {
3903 14
            $pattern = '^[\\s]+';
3904
        }
3905
3906 21
        if (self::$SUPPORT['mbstring'] === true) {
3907
            /** @noinspection PhpComposerExtensionStubsInspection */
3908 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3909
        }
3910
3911
        return self::regex_replace($str, $pattern, '', '', '/');
3912
    }
3913
3914
    /**
3915
     * Returns the UTF-8 character with the maximum code point in the given data.
3916
     *
3917
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3918
     *
3919
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3920
     */
3921 2
    public static function max($arg)
3922
    {
3923 2
        if (\is_array($arg) === true) {
3924 2
            $arg = \implode('', $arg);
3925
        }
3926
3927 2
        $codepoints = self::codepoints($arg, false);
3928 2
        if (\count($codepoints) === 0) {
3929 2
            return null;
3930
        }
3931
3932 2
        $codepoint_max = \max($codepoints);
3933
3934 2
        return self::chr($codepoint_max);
3935
    }
3936
3937
    /**
3938
     * Calculates and returns the maximum number of bytes taken by any
3939
     * UTF-8 encoded character in the given string.
3940
     *
3941
     * @param string $str <p>The original Unicode string.</p>
3942
     *
3943
     * @return int max byte lengths of the given chars
3944
     */
3945 2
    public static function max_chr_width(string $str): int
3946
    {
3947 2
        $bytes = self::chr_size_list($str);
3948 2
        if (\count($bytes) > 0) {
3949 2
            return (int) \max($bytes);
3950
        }
3951
3952 2
        return 0;
3953
    }
3954
3955
    /**
3956
     * Checks whether mbstring is available on the server.
3957
     *
3958
     * @return bool
3959
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3960
     */
3961 28
    public static function mbstring_loaded(): bool
3962
    {
3963 28
        return \extension_loaded('mbstring');
3964
    }
3965
3966
    /**
3967
     * Returns the UTF-8 character with the minimum code point in the given data.
3968
     *
3969
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3970
     *
3971
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3972
     */
3973 2
    public static function min($arg)
3974
    {
3975 2
        if (\is_array($arg) === true) {
3976 2
            $arg = \implode('', $arg);
3977
        }
3978
3979 2
        $codepoints = self::codepoints($arg, false);
3980 2
        if (\count($codepoints) === 0) {
3981 2
            return null;
3982
        }
3983
3984 2
        $codepoint_min = \min($codepoints);
3985
3986 2
        return self::chr($codepoint_min);
3987
    }
3988
3989
    /**
3990
     * alias for "UTF8::normalize_encoding()"
3991
     *
3992
     * @param mixed $encoding
3993
     * @param mixed $fallback
3994
     *
3995
     * @return mixed
3996
     *
3997
     * @see UTF8::normalize_encoding()
3998
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
3999
     */
4000 2
    public static function normalizeEncoding($encoding, $fallback = '')
4001
    {
4002 2
        return self::normalize_encoding($encoding, $fallback);
4003
    }
4004
4005
    /**
4006
     * Normalize the encoding-"name" input.
4007
     *
4008
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4009
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4010
     *
4011
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4012
     */
4013 331
    public static function normalize_encoding($encoding, $fallback = '')
4014
    {
4015 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4016
4017
        // init
4018 331
        $encoding = (string) $encoding;
4019
4020 331
        if (!$encoding) {
4021 285
            return $fallback;
4022
        }
4023
4024
        if (
4025 51
            $encoding === 'UTF-8'
4026
            ||
4027 51
            $encoding === 'UTF8'
4028
        ) {
4029 26
            return 'UTF-8';
4030
        }
4031
4032
        if (
4033 43
            $encoding === '8BIT'
4034
            ||
4035 43
            $encoding === 'BINARY'
4036
        ) {
4037
            return 'CP850';
4038
        }
4039
4040
        if (
4041 43
            $encoding === 'HTML'
4042
            ||
4043 43
            $encoding === 'HTML-ENTITIES'
4044
        ) {
4045 2
            return 'HTML-ENTITIES';
4046
        }
4047
4048
        if (
4049 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4050
            ||
4051 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4052
        ) {
4053 1
            return $fallback;
4054
        }
4055
4056 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4057 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4058
        }
4059
4060 6
        if (self::$ENCODINGS === null) {
4061 1
            self::$ENCODINGS = self::getData('encodings');
4062
        }
4063
4064 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4065 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4066
4067 4
            return $encoding;
4068
        }
4069
4070 5
        $encoding_original = $encoding;
4071 5
        $encoding = \strtoupper($encoding);
4072 5
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4073
4074
        $equivalences = [
4075 5
            'ISO8859'     => 'ISO-8859-1',
4076
            'ISO88591'    => 'ISO-8859-1',
4077
            'ISO'         => 'ISO-8859-1',
4078
            'LATIN'       => 'ISO-8859-1',
4079
            'LATIN1'      => 'ISO-8859-1', // Western European
4080
            'ISO88592'    => 'ISO-8859-2',
4081
            'LATIN2'      => 'ISO-8859-2', // Central European
4082
            'ISO88593'    => 'ISO-8859-3',
4083
            'LATIN3'      => 'ISO-8859-3', // Southern European
4084
            'ISO88594'    => 'ISO-8859-4',
4085
            'LATIN4'      => 'ISO-8859-4', // Northern European
4086
            'ISO88595'    => 'ISO-8859-5',
4087
            'ISO88596'    => 'ISO-8859-6', // Greek
4088
            'ISO88597'    => 'ISO-8859-7',
4089
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4090
            'ISO88599'    => 'ISO-8859-9',
4091
            'LATIN5'      => 'ISO-8859-9', // Turkish
4092
            'ISO885911'   => 'ISO-8859-11',
4093
            'TIS620'      => 'ISO-8859-11', // Thai
4094
            'ISO885910'   => 'ISO-8859-10',
4095
            'LATIN6'      => 'ISO-8859-10', // Nordic
4096
            'ISO885913'   => 'ISO-8859-13',
4097
            'LATIN7'      => 'ISO-8859-13', // Baltic
4098
            'ISO885914'   => 'ISO-8859-14',
4099
            'LATIN8'      => 'ISO-8859-14', // Celtic
4100
            'ISO885915'   => 'ISO-8859-15',
4101
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4102
            'ISO885916'   => 'ISO-8859-16',
4103
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4104
            'CP1250'      => 'WINDOWS-1250',
4105
            'WIN1250'     => 'WINDOWS-1250',
4106
            'WINDOWS1250' => 'WINDOWS-1250',
4107
            'CP1251'      => 'WINDOWS-1251',
4108
            'WIN1251'     => 'WINDOWS-1251',
4109
            'WINDOWS1251' => 'WINDOWS-1251',
4110
            'CP1252'      => 'WINDOWS-1252',
4111
            'WIN1252'     => 'WINDOWS-1252',
4112
            'WINDOWS1252' => 'WINDOWS-1252',
4113
            'CP1253'      => 'WINDOWS-1253',
4114
            'WIN1253'     => 'WINDOWS-1253',
4115
            'WINDOWS1253' => 'WINDOWS-1253',
4116
            'CP1254'      => 'WINDOWS-1254',
4117
            'WIN1254'     => 'WINDOWS-1254',
4118
            'WINDOWS1254' => 'WINDOWS-1254',
4119
            'CP1255'      => 'WINDOWS-1255',
4120
            'WIN1255'     => 'WINDOWS-1255',
4121
            'WINDOWS1255' => 'WINDOWS-1255',
4122
            'CP1256'      => 'WINDOWS-1256',
4123
            'WIN1256'     => 'WINDOWS-1256',
4124
            'WINDOWS1256' => 'WINDOWS-1256',
4125
            'CP1257'      => 'WINDOWS-1257',
4126
            'WIN1257'     => 'WINDOWS-1257',
4127
            'WINDOWS1257' => 'WINDOWS-1257',
4128
            'CP1258'      => 'WINDOWS-1258',
4129
            'WIN1258'     => 'WINDOWS-1258',
4130
            'WINDOWS1258' => 'WINDOWS-1258',
4131
            'UTF16'       => 'UTF-16',
4132
            'UTF32'       => 'UTF-32',
4133
            'UTF8'        => 'UTF-8',
4134
            'UTF'         => 'UTF-8',
4135
            'UTF7'        => 'UTF-7',
4136
            '8BIT'        => 'CP850',
4137
            'BINARY'      => 'CP850',
4138
        ];
4139
4140 5
        if (!empty($equivalences[$encoding_upper_helper])) {
4141 4
            $encoding = $equivalences[$encoding_upper_helper];
4142
        }
4143
4144 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4145
4146 5
        return $encoding;
4147
    }
4148
4149
    /**
4150
     * Standardize line ending to unix-like.
4151
     *
4152
     * @param string $str
4153
     *
4154
     * @return string
4155
     */
4156 5
    public static function normalize_line_ending(string $str): string
4157
    {
4158 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4159
    }
4160
4161
    /**
4162
     * Normalize some MS Word special characters.
4163
     *
4164
     * @param string $str <p>The string to be normalized.</p>
4165
     *
4166
     * @return string
4167
     */
4168 10
    public static function normalize_msword(string $str): string
4169
    {
4170 10
        return ASCII::normalize_msword($str);
4171
    }
4172
4173
    /**
4174
     * Normalize the whitespace.
4175
     *
4176
     * @param string $str                        <p>The string to be normalized.</p>
4177
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4178
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4179
     *                                           bidirectional text chars.</p>
4180
     *
4181
     * @return string
4182
     */
4183 61
    public static function normalize_whitespace(
4184
        string $str,
4185
        bool $keep_non_breaking_space = false,
4186
        bool $keep_bidi_unicode_controls = false
4187
    ): string {
4188 61
        return ASCII::normalize_whitespace(
4189 61
            $str,
4190 61
            $keep_non_breaking_space,
4191 61
            $keep_bidi_unicode_controls
4192
        );
4193
    }
4194
4195
    /**
4196
     * Calculates Unicode code point of the given UTF-8 encoded character.
4197
     *
4198
     * INFO: opposite to UTF8::chr()
4199
     *
4200
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4201
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4202
     *
4203
     * @return int
4204
     *             Unicode code point of the given character,<br>
4205
     *             0 on invalid UTF-8 byte sequence
4206
     */
4207 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4208
    {
4209 30
        static $CHAR_CACHE = [];
4210
4211
        // init
4212 30
        $chr = (string) $chr;
4213
4214 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4215 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4216
        }
4217
4218 30
        $cache_key = $chr . $encoding;
4219 30
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4220 30
            return $CHAR_CACHE[$cache_key];
4221
        }
4222
4223
        // check again, if it's still not UTF-8
4224 12
        if ($encoding !== 'UTF-8') {
4225 3
            $chr = self::encode($encoding, $chr);
4226
        }
4227
4228 12
        if (self::$ORD === null) {
4229
            self::$ORD = self::getData('ord');
4230
        }
4231
4232 12
        if (isset(self::$ORD[$chr])) {
4233 12
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4234
        }
4235
4236
        //
4237
        // fallback via "IntlChar"
4238
        //
4239
4240 6
        if (self::$SUPPORT['intlChar'] === true) {
4241
            /** @noinspection PhpComposerExtensionStubsInspection */
4242 5
            $code = \IntlChar::ord($chr);
4243 5
            if ($code) {
4244 5
                return $CHAR_CACHE[$cache_key] = $code;
4245
            }
4246
        }
4247
4248
        //
4249
        // fallback via vanilla php
4250
        //
4251
4252
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4253 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4254
        /** @noinspection OffsetOperationsInspection */
4255 1
        $code = $chr ? $chr[1] : 0;
4256
4257
        /** @noinspection OffsetOperationsInspection */
4258 1
        if ($code >= 0xF0 && isset($chr[4])) {
4259
            /** @noinspection UnnecessaryCastingInspection */
4260
            /** @noinspection OffsetOperationsInspection */
4261
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4262
        }
4263
4264
        /** @noinspection OffsetOperationsInspection */
4265 1
        if ($code >= 0xE0 && isset($chr[3])) {
4266
            /** @noinspection UnnecessaryCastingInspection */
4267
            /** @noinspection OffsetOperationsInspection */
4268 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4269
        }
4270
4271
        /** @noinspection OffsetOperationsInspection */
4272 1
        if ($code >= 0xC0 && isset($chr[2])) {
4273
            /** @noinspection UnnecessaryCastingInspection */
4274
            /** @noinspection OffsetOperationsInspection */
4275 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4276
        }
4277
4278
        return $CHAR_CACHE[$cache_key] = $code;
4279
    }
4280
4281
    /**
4282
     * Parses the string into an array (into the the second parameter).
4283
     *
4284
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4285
     *          if the second parameter is not set!
4286
     *
4287
     * @see http://php.net/manual/en/function.parse-str.php
4288
     *
4289
     * @param string $str        <p>The input string.</p>
4290
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4291
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4292
     *
4293
     * @return bool
4294
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4295
     */
4296 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4297
    {
4298 2
        if ($clean_utf8 === true) {
4299 2
            $str = self::clean($str);
4300
        }
4301
4302 2
        if (self::$SUPPORT['mbstring'] === true) {
4303 2
            $return = \mb_parse_str($str, $result);
4304
4305 2
            return $return !== false && $result !== [];
4306
        }
4307
4308
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4309
        \parse_str($str, $result);
4310
4311
        return $result !== [];
4312
    }
4313
4314
    /**
4315
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4316
     *
4317
     * @return bool
4318
     *              <strong>true</strong> if support is available,<br>
4319
     *              <strong>false</strong> otherwise
4320
     */
4321 102
    public static function pcre_utf8_support(): bool
4322
    {
4323
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4324 102
        return (bool) @\preg_match('//u', '');
4325
    }
4326
4327
    /**
4328
     * Create an array containing a range of UTF-8 characters.
4329
     *
4330
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4331
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4332
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4333
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4334
     * @param float|int $step      [optional] <p>
4335
     *                             If a step value is given, it will be used as the
4336
     *                             increment between elements in the sequence. step
4337
     *                             should be given as a positive number. If not specified,
4338
     *                             step will default to 1.
4339
     *                             </p>
4340
     *
4341
     * @return string[]
4342
     */
4343 2
    public static function range(
4344
        $var1,
4345
        $var2,
4346
        bool $use_ctype = true,
4347
        string $encoding = 'UTF-8',
4348
        $step = 1
4349
    ): array {
4350 2
        if (!$var1 || !$var2) {
4351 2
            return [];
4352
        }
4353
4354 2
        if ($step !== 1) {
4355 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4356
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4357
            }
4358
4359 1
            if ($step <= 0) {
4360
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4361
            }
4362
        }
4363
4364 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4365
            throw new \RuntimeException('ext-ctype: is not installed');
4366
        }
4367
4368 2
        $is_digit = false;
4369 2
        $is_xdigit = false;
4370
4371
        /** @noinspection PhpComposerExtensionStubsInspection */
4372 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4373 2
            $is_digit = true;
4374 2
            $start = (int) $var1;
4375 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4376
            $is_xdigit = true;
4377
            $start = (int) self::hex_to_int($var1);
4378 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4379 1
            $start = (int) $var1;
4380
        } else {
4381 2
            $start = self::ord($var1);
4382
        }
4383
4384 2
        if (!$start) {
4385
            return [];
4386
        }
4387
4388 2
        if ($is_digit) {
4389 2
            $end = (int) $var2;
4390 2
        } elseif ($is_xdigit) {
4391
            $end = (int) self::hex_to_int($var2);
4392 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4393 1
            $end = (int) $var2;
4394
        } else {
4395 2
            $end = self::ord($var2);
4396
        }
4397
4398 2
        if (!$end) {
4399
            return [];
4400
        }
4401
4402 2
        $array = [];
4403 2
        foreach (\range($start, $end, $step) as $i) {
4404 2
            $array[] = (string) self::chr((int) $i, $encoding);
4405
        }
4406
4407 2
        return $array;
4408
    }
4409
4410
    /**
4411
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4412
     *
4413
     * e.g:
4414
     * 'test+test'                     => 'test+test'
4415
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4416
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4417
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4418
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4419
     * 'Düsseldorf'                   => 'Düsseldorf'
4420
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4421
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4422
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4423
     *
4424
     * @param string $str          <p>The input string.</p>
4425
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4426
     *
4427
     * @return string
4428
     */
4429 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4430
    {
4431 6
        if ($str === '') {
4432 4
            return '';
4433
        }
4434
4435
        if (
4436 6
            \strpos($str, '&') === false
4437
            &&
4438 6
            \strpos($str, '%') === false
4439
            &&
4440 6
            \strpos($str, '+') === false
4441
            &&
4442 6
            \strpos($str, '\u') === false
4443
        ) {
4444 4
            return self::fix_simple_utf8($str);
4445
        }
4446
4447 6
        $str = self::urldecode_unicode_helper($str);
4448
4449
        do {
4450 6
            $str_compare = $str;
4451
4452
            /**
4453
             * @psalm-suppress PossiblyInvalidArgument
4454
             */
4455 6
            $str = self::fix_simple_utf8(
4456 6
                \rawurldecode(
4457 6
                    self::html_entity_decode(
4458 6
                        self::to_utf8($str),
4459 6
                        \ENT_QUOTES | \ENT_HTML5
4460
                    )
4461
                )
4462
            );
4463 6
        } while ($multi_decode === true && $str_compare !== $str);
4464
4465 6
        return $str;
4466
    }
4467
4468
    /**
4469
     * Replaces all occurrences of $pattern in $str by $replacement.
4470
     *
4471
     * @param string $str         <p>The input string.</p>
4472
     * @param string $pattern     <p>The regular expression pattern.</p>
4473
     * @param string $replacement <p>The string to replace with.</p>
4474
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4475
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4476
     *
4477
     * @return string
4478
     */
4479 18
    public static function regex_replace(
4480
        string $str,
4481
        string $pattern,
4482
        string $replacement,
4483
        string $options = '',
4484
        string $delimiter = '/'
4485
    ): string {
4486 18
        if ($options === 'msr') {
4487 9
            $options = 'ms';
4488
        }
4489
4490
        // fallback
4491 18
        if (!$delimiter) {
4492
            $delimiter = '/';
4493
        }
4494
4495 18
        return (string) \preg_replace(
4496 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4497 18
            $replacement,
4498 18
            $str
4499
        );
4500
    }
4501
4502
    /**
4503
     * alias for "UTF8::remove_bom()"
4504
     *
4505
     * @param string $str
4506
     *
4507
     * @return string
4508
     *
4509
     * @see UTF8::remove_bom()
4510
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4511
     */
4512
    public static function removeBOM(string $str): string
4513
    {
4514
        return self::remove_bom($str);
4515
    }
4516
4517
    /**
4518
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4519
     *
4520
     * @param string $str <p>The input string.</p>
4521
     *
4522
     * @return string string without UTF-BOM
4523
     */
4524 55
    public static function remove_bom(string $str): string
4525
    {
4526 55
        if ($str === '') {
4527 9
            return '';
4528
        }
4529
4530 55
        $str_length = \strlen($str);
4531 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4532 55
            if (\strpos($str, $bom_string, 0) === 0) {
4533 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4534 11
                if ($str_tmp === false) {
4535
                    return '';
4536
                }
4537
4538 11
                $str_length -= (int) $bom_byte_length;
4539
4540 55
                $str = (string) $str_tmp;
4541
            }
4542
        }
4543
4544 55
        return $str;
4545
    }
4546
4547
    /**
4548
     * Removes duplicate occurrences of a string in another string.
4549
     *
4550
     * @param string          $str  <p>The base string.</p>
4551
     * @param string|string[] $what <p>String to search for in the base string.</p>
4552
     *
4553
     * @return string the result string with removed duplicates
4554
     */
4555 2
    public static function remove_duplicates(string $str, $what = ' '): string
4556
    {
4557 2
        if (\is_string($what) === true) {
4558 2
            $what = [$what];
4559
        }
4560
4561 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4562
            /** @noinspection ForeachSourceInspection */
4563 2
            foreach ($what as $item) {
4564 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4565
            }
4566
        }
4567
4568 2
        return $str;
4569
    }
4570
4571
    /**
4572
     * Remove html via "strip_tags()" from the string.
4573
     *
4574
     * @param string $str
4575
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4576
     *                               not be stripped. Default: null
4577
     *                               </p>
4578
     *
4579
     * @return string
4580
     */
4581 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
4582
    {
4583 6
        return \strip_tags($str, $allowable_tags);
4584
    }
4585
4586
    /**
4587
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4588
     *
4589
     * @param string $str
4590
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4591
     *
4592
     * @return string
4593
     */
4594 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4595
    {
4596 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4597
    }
4598
4599
    /**
4600
     * Remove invisible characters from a string.
4601
     *
4602
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4603
     *
4604
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4605
     *
4606
     * @param string $str
4607
     * @param bool   $url_encoded
4608
     * @param string $replacement
4609
     *
4610
     * @return string
4611
     */
4612 89
    public static function remove_invisible_characters(
4613
        string $str,
4614
        bool $url_encoded = true,
4615
        string $replacement = ''
4616
    ): string {
4617 89
        return ASCII::remove_invisible_characters(
4618 89
            $str,
4619 89
            $url_encoded,
4620 89
            $replacement
4621
        );
4622
    }
4623
4624
    /**
4625
     * Returns a new string with the prefix $substring removed, if present.
4626
     *
4627
     * @param string $str
4628
     * @param string $substring <p>The prefix to remove.</p>
4629
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4630
     *
4631
     * @return string string without the prefix $substring
4632
     */
4633 12
    public static function remove_left(
4634
        string $str,
4635
        string $substring,
4636
        string $encoding = 'UTF-8'
4637
    ): string {
4638 12
        if ($substring && \strpos($str, $substring) === 0) {
4639 6
            if ($encoding === 'UTF-8') {
4640 4
                return (string) \mb_substr(
4641 4
                    $str,
4642 4
                    (int) \mb_strlen($substring)
4643
                );
4644
            }
4645
4646 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4647
4648 2
            return (string) self::substr(
4649 2
                $str,
4650 2
                (int) self::strlen($substring, $encoding),
4651 2
                null,
4652 2
                $encoding
4653
            );
4654
        }
4655
4656 6
        return $str;
4657
    }
4658
4659
    /**
4660
     * Returns a new string with the suffix $substring removed, if present.
4661
     *
4662
     * @param string $str
4663
     * @param string $substring <p>The suffix to remove.</p>
4664
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4665
     *
4666
     * @return string string having a $str without the suffix $substring
4667
     */
4668 12
    public static function remove_right(
4669
        string $str,
4670
        string $substring,
4671
        string $encoding = 'UTF-8'
4672
    ): string {
4673 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4674 6
            if ($encoding === 'UTF-8') {
4675 4
                return (string) \mb_substr(
4676 4
                    $str,
4677 4
                    0,
4678 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4679
                );
4680
            }
4681
4682 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4683
4684 2
            return (string) self::substr(
4685 2
                $str,
4686 2
                0,
4687 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4688 2
                $encoding
4689
            );
4690
        }
4691
4692 6
        return $str;
4693
    }
4694
4695
    /**
4696
     * Replaces all occurrences of $search in $str by $replacement.
4697
     *
4698
     * @param string $str            <p>The input string.</p>
4699
     * @param string $search         <p>The needle to search for.</p>
4700
     * @param string $replacement    <p>The string to replace with.</p>
4701
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4702
     *
4703
     * @return string string after the replacements
4704
     */
4705 29
    public static function replace(
4706
        string $str,
4707
        string $search,
4708
        string $replacement,
4709
        bool $case_sensitive = true
4710
    ): string {
4711 29
        if ($case_sensitive) {
4712 22
            return \str_replace($search, $replacement, $str);
4713
        }
4714
4715 7
        return self::str_ireplace($search, $replacement, $str);
4716
    }
4717
4718
    /**
4719
     * Replaces all occurrences of $search in $str by $replacement.
4720
     *
4721
     * @param string       $str            <p>The input string.</p>
4722
     * @param array        $search         <p>The elements to search for.</p>
4723
     * @param array|string $replacement    <p>The string to replace with.</p>
4724
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4725
     *
4726
     * @return string string after the replacements
4727
     */
4728 30
    public static function replace_all(
4729
        string $str,
4730
        array $search,
4731
        $replacement,
4732
        bool $case_sensitive = true
4733
    ): string {
4734 30
        if ($case_sensitive) {
4735 23
            return \str_replace($search, $replacement, $str);
4736
        }
4737
4738 7
        return self::str_ireplace($search, $replacement, $str);
4739
    }
4740
4741
    /**
4742
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4743
     *
4744
     * @param string $str                        <p>The input string</p>
4745
     * @param string $replacement_char           <p>The replacement character.</p>
4746
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4747
     *
4748
     * @return string
4749
     */
4750 35
    public static function replace_diamond_question_mark(
4751
        string $str,
4752
        string $replacement_char = '',
4753
        bool $process_invalid_utf8_chars = true
4754
    ): string {
4755 35
        if ($str === '') {
4756 9
            return '';
4757
        }
4758
4759 35
        if ($process_invalid_utf8_chars === true) {
4760 35
            $replacement_char_helper = $replacement_char;
4761 35
            if ($replacement_char === '') {
4762 35
                $replacement_char_helper = 'none';
4763
            }
4764
4765 35
            if (self::$SUPPORT['mbstring'] === false) {
4766
                // if there is no native support for "mbstring",
4767
                // then we need to clean the string before ...
4768
                $str = self::clean($str);
4769
            }
4770
4771 35
            $save = \mb_substitute_character();
4772 35
            \mb_substitute_character($replacement_char_helper);
4773
            // the polyfill maybe return false, so cast to string
4774 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4775 35
            \mb_substitute_character($save);
4776
        }
4777
4778 35
        return \str_replace(
4779
            [
4780 35
                "\xEF\xBF\xBD",
4781
                '�',
4782
            ],
4783
            [
4784 35
                $replacement_char,
4785 35
                $replacement_char,
4786
            ],
4787 35
            $str
4788
        );
4789
    }
4790
4791
    /**
4792
     * Strip whitespace or other characters from the end of a UTF-8 string.
4793
     *
4794
     * @param string      $str   <p>The string to be trimmed.</p>
4795
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4796
     *
4797
     * @return string the string with unwanted characters stripped from the right
4798
     */
4799 20
    public static function rtrim(string $str = '', string $chars = null): string
4800
    {
4801 20
        if ($str === '') {
4802 3
            return '';
4803
        }
4804
4805 19
        if ($chars) {
4806 8
            $chars = \preg_quote($chars, '/');
4807 8
            $pattern = "[${chars}]+$";
4808
        } else {
4809 14
            $pattern = '[\\s]+$';
4810
        }
4811
4812 19
        if (self::$SUPPORT['mbstring'] === true) {
4813
            /** @noinspection PhpComposerExtensionStubsInspection */
4814 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4815
        }
4816
4817
        return self::regex_replace($str, $pattern, '', '', '/');
4818
    }
4819
4820
    /**
4821
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4822
     *
4823
     * @psalm-suppress MissingReturnType
4824
     */
4825 2
    public static function showSupport()
4826
    {
4827 2
        echo '<pre>';
4828 2
        foreach (self::$SUPPORT as $key => &$value) {
4829 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4830
        }
4831 2
        unset($value);
4832 2
        echo '</pre>';
4833 2
    }
4834
4835
    /**
4836
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4837
     *
4838
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4839
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4840
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4841
     *
4842
     * @return string the HTML numbered entity
4843
     */
4844 2
    public static function single_chr_html_encode(
4845
        string $char,
4846
        bool $keep_ascii_chars = false,
4847
        string $encoding = 'UTF-8'
4848
    ): string {
4849 2
        if ($char === '') {
4850 2
            return '';
4851
        }
4852
4853
        if (
4854 2
            $keep_ascii_chars === true
4855
            &&
4856 2
            ASCII::is_ascii($char) === true
4857
        ) {
4858 2
            return $char;
4859
        }
4860
4861 2
        return '&#' . self::ord($char, $encoding) . ';';
4862
    }
4863
4864
    /**
4865
     * @param string $str
4866
     * @param int    $tab_length
4867
     *
4868
     * @return string
4869
     */
4870 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
4871
    {
4872 5
        if ($tab_length === 4) {
4873 3
            $tab = '    ';
4874 2
        } elseif ($tab_length === 2) {
4875 1
            $tab = '  ';
4876
        } else {
4877 1
            $tab = \str_repeat(' ', $tab_length);
4878
        }
4879
4880 5
        return \str_replace($tab, "\t", $str);
4881
    }
4882
4883
    /**
4884
     * alias for "UTF8::str_split()"
4885
     *
4886
     * @param string|string[] $str
4887
     * @param int             $length
4888
     * @param bool            $clean_utf8
4889
     *
4890
     * @return string[]
4891
     *
4892
     * @see UTF8::str_split()
4893
     */
4894 9
    public static function split(
4895
        $str,
4896
        int $length = 1,
4897
        bool $clean_utf8 = false
4898
    ): array {
4899 9
        return self::str_split($str, $length, $clean_utf8);
4900
    }
4901
4902
    /**
4903
     * alias for "UTF8::str_starts_with()"
4904
     *
4905
     * @param string $haystack
4906
     * @param string $needle
4907
     *
4908
     * @return bool
4909
     *
4910
     * @see UTF8::str_starts_with()
4911
     */
4912
    public static function str_begins(string $haystack, string $needle): bool
4913
    {
4914
        return self::str_starts_with($haystack, $needle);
4915
    }
4916
4917
    /**
4918
     * Returns a camelCase version of the string. Trims surrounding spaces,
4919
     * capitalizes letters following digits, spaces, dashes and underscores,
4920
     * and removes spaces, dashes, as well as underscores.
4921
     *
4922
     * @param string      $str                           <p>The input string.</p>
4923
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
4924
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4925
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4926
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4927
     *
4928
     * @return string
4929
     */
4930 32
    public static function str_camelize(
4931
        string $str,
4932
        string $encoding = 'UTF-8',
4933
        bool $clean_utf8 = false,
4934
        string $lang = null,
4935
        bool $try_to_keep_the_string_length = false
4936
    ): string {
4937 32
        if ($clean_utf8 === true) {
4938
            $str = self::clean($str);
4939
        }
4940
4941 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4942 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4943
        }
4944
4945 32
        $str = self::lcfirst(
4946 32
            \trim($str),
4947 32
            $encoding,
4948 32
            false,
4949 32
            $lang,
4950 32
            $try_to_keep_the_string_length
4951
        );
4952 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4953
4954 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
4955
4956 32
        $str = (string) \preg_replace_callback(
4957 32
            '/[-_\\s]+(.)?/u',
4958
            /**
4959
             * @param array $match
4960
             *
4961
             * @return string
4962
             */
4963
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
4964 27
                if (isset($match[1])) {
4965 27
                    if ($use_mb_functions === true) {
4966 27
                        if ($encoding === 'UTF-8') {
4967 27
                            return \mb_strtoupper($match[1]);
4968
                        }
4969
4970
                        return \mb_strtoupper($match[1], $encoding);
4971
                    }
4972
4973
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
4974
                }
4975
4976 1
                return '';
4977 32
            },
4978 32
            $str
4979
        );
4980
4981 32
        return (string) \preg_replace_callback(
4982 32
            '/[\\p{N}]+(.)?/u',
4983
            /**
4984
             * @param array $match
4985
             *
4986
             * @return string
4987
             */
4988
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
4989 6
                if ($use_mb_functions === true) {
4990 6
                    if ($encoding === 'UTF-8') {
4991 6
                        return \mb_strtoupper($match[0]);
4992
                    }
4993
4994
                    return \mb_strtoupper($match[0], $encoding);
4995
                }
4996
4997
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4998 32
            },
4999 32
            $str
5000
        );
5001
    }
5002
5003
    /**
5004
     * Returns the string with the first letter of each word capitalized,
5005
     * except for when the word is a name which shouldn't be capitalized.
5006
     *
5007
     * @param string $str
5008
     *
5009
     * @return string string with $str capitalized
5010
     */
5011 1
    public static function str_capitalize_name(string $str): string
5012
    {
5013 1
        return self::str_capitalize_name_helper(
5014 1
            self::str_capitalize_name_helper(
5015 1
                self::collapse_whitespace($str),
5016 1
                ' '
5017
            ),
5018 1
            '-'
5019
        );
5020
    }
5021
5022
    /**
5023
     * Returns true if the string contains $needle, false otherwise. By default
5024
     * the comparison is case-sensitive, but can be made insensitive by setting
5025
     * $case_sensitive to false.
5026
     *
5027
     * @param string $haystack       <p>The input string.</p>
5028
     * @param string $needle         <p>Substring to look for.</p>
5029
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5030
     *
5031
     * @return bool whether or not $haystack contains $needle
5032
     */
5033 21
    public static function str_contains(
5034
        string $haystack,
5035
        string $needle,
5036
        bool $case_sensitive = true
5037
    ): bool {
5038 21
        if ($case_sensitive) {
5039 11
            return \strpos($haystack, $needle) !== false;
5040
        }
5041
5042 10
        return \mb_stripos($haystack, $needle) !== false;
5043
    }
5044
5045
    /**
5046
     * Returns true if the string contains all $needles, false otherwise. By
5047
     * default the comparison is case-sensitive, but can be made insensitive by
5048
     * setting $case_sensitive to false.
5049
     *
5050
     * @param string $haystack       <p>The input string.</p>
5051
     * @param array  $needles        <p>SubStrings to look for.</p>
5052
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5053
     *
5054
     * @return bool whether or not $haystack contains $needle
5055
     */
5056 44
    public static function str_contains_all(
5057
        string $haystack,
5058
        array $needles,
5059
        bool $case_sensitive = true
5060
    ): bool {
5061 44
        if ($haystack === '' || $needles === []) {
5062 1
            return false;
5063
        }
5064
5065
        /** @noinspection LoopWhichDoesNotLoopInspection */
5066 43
        foreach ($needles as &$needle) {
5067 43
            if (!$needle) {
5068 1
                return false;
5069
            }
5070
5071 42
            if ($case_sensitive) {
5072 22
                return \strpos($haystack, $needle) !== false;
5073
            }
5074
5075 20
            return \mb_stripos($haystack, $needle) !== false;
5076
        }
5077
5078
        return true;
5079
    }
5080
5081
    /**
5082
     * Returns true if the string contains any $needles, false otherwise. By
5083
     * default the comparison is case-sensitive, but can be made insensitive by
5084
     * setting $case_sensitive to false.
5085
     *
5086
     * @param string $haystack       <p>The input string.</p>
5087
     * @param array  $needles        <p>SubStrings to look for.</p>
5088
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5089
     *
5090
     * @return bool
5091
     *              Whether or not $str contains $needle
5092
     */
5093 46
    public static function str_contains_any(
5094
        string $haystack,
5095
        array $needles,
5096
        bool $case_sensitive = true
5097
    ): bool {
5098 46
        if ($haystack === '' || $needles === []) {
5099 1
            return false;
5100
        }
5101
5102
        /** @noinspection LoopWhichDoesNotLoopInspection */
5103 45
        foreach ($needles as &$needle) {
5104 45
            if (!$needle) {
5105
                continue;
5106
            }
5107
5108 45
            if ($case_sensitive) {
5109 25
                if (\strpos($haystack, $needle) !== false) {
5110 14
                    return true;
5111
                }
5112
5113 13
                continue;
5114
            }
5115
5116 20
            if (\mb_stripos($haystack, $needle) !== false) {
5117 20
                return true;
5118
            }
5119
        }
5120
5121 19
        return false;
5122
    }
5123
5124
    /**
5125
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5126
     * inserted before uppercase characters (with the exception of the first
5127
     * character of the string), and in place of spaces as well as underscores.
5128
     *
5129
     * @param string $str      <p>The input string.</p>
5130
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5131
     *
5132
     * @return string
5133
     */
5134 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5135
    {
5136 19
        return self::str_delimit($str, '-', $encoding);
5137
    }
5138
5139
    /**
5140
     * Returns a lowercase and trimmed string separated by the given delimiter.
5141
     * Delimiters are inserted before uppercase characters (with the exception
5142
     * of the first character of the string), and in place of spaces, dashes,
5143
     * and underscores. Alpha delimiters are not converted to lowercase.
5144
     *
5145
     * @param string      $str                           <p>The input string.</p>
5146
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5147
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5148
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5149
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5150
     *                                                   tr</p>
5151
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5152
     *                                                   ß</p>
5153
     *
5154
     * @return string
5155
     */
5156 49
    public static function str_delimit(
5157
        string $str,
5158
        string $delimiter,
5159
        string $encoding = 'UTF-8',
5160
        bool $clean_utf8 = false,
5161
        string $lang = null,
5162
        bool $try_to_keep_the_string_length = false
5163
    ): string {
5164 49
        if (self::$SUPPORT['mbstring'] === true) {
5165
            /** @noinspection PhpComposerExtensionStubsInspection */
5166 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5167
5168 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5169 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5170 22
                $str = \mb_strtolower($str);
5171
            } else {
5172 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5173
            }
5174
5175
            /** @noinspection PhpComposerExtensionStubsInspection */
5176 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5177
        }
5178
5179
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5180
5181
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5182
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5183
            $str = \mb_strtolower($str);
5184
        } else {
5185
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5186
        }
5187
5188
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5189
    }
5190
5191
    /**
5192
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5193
     *
5194
     * @param string $str <p>The input string.</p>
5195
     *
5196
     * @return false|string
5197
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5198
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5199
     */
5200 30
    public static function str_detect_encoding($str)
5201
    {
5202
        // init
5203 30
        $str = (string) $str;
5204
5205
        //
5206
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5207
        //
5208
5209 30
        if (self::is_binary($str, true) === true) {
5210 11
            $is_utf32 = self::is_utf32($str, false);
5211 11
            if ($is_utf32 === 1) {
5212
                return 'UTF-32LE';
5213
            }
5214 11
            if ($is_utf32 === 2) {
5215 1
                return 'UTF-32BE';
5216
            }
5217
5218 11
            $is_utf16 = self::is_utf16($str, false);
5219 11
            if ($is_utf16 === 1) {
5220 3
                return 'UTF-16LE';
5221
            }
5222 11
            if ($is_utf16 === 2) {
5223 2
                return 'UTF-16BE';
5224
            }
5225
5226
            // is binary but not "UTF-16" or "UTF-32"
5227 9
            return false;
5228
        }
5229
5230
        //
5231
        // 2.) simple check for ASCII chars
5232
        //
5233
5234 26
        if (ASCII::is_ascii($str) === true) {
5235 10
            return 'ASCII';
5236
        }
5237
5238
        //
5239
        // 3.) simple check for UTF-8 chars
5240
        //
5241
5242 26
        if (self::is_utf8_string($str) === true) {
5243 19
            return 'UTF-8';
5244
        }
5245
5246
        //
5247
        // 4.) check via "mb_detect_encoding()"
5248
        //
5249
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5250
5251
        $encoding_detecting_order = [
5252 15
            'ISO-8859-1',
5253
            'ISO-8859-2',
5254
            'ISO-8859-3',
5255
            'ISO-8859-4',
5256
            'ISO-8859-5',
5257
            'ISO-8859-6',
5258
            'ISO-8859-7',
5259
            'ISO-8859-8',
5260
            'ISO-8859-9',
5261
            'ISO-8859-10',
5262
            'ISO-8859-13',
5263
            'ISO-8859-14',
5264
            'ISO-8859-15',
5265
            'ISO-8859-16',
5266
            'WINDOWS-1251',
5267
            'WINDOWS-1252',
5268
            'WINDOWS-1254',
5269
            'CP932',
5270
            'CP936',
5271
            'CP950',
5272
            'CP866',
5273
            'CP850',
5274
            'CP51932',
5275
            'CP50220',
5276
            'CP50221',
5277
            'CP50222',
5278
            'ISO-2022-JP',
5279
            'ISO-2022-KR',
5280
            'JIS',
5281
            'JIS-ms',
5282
            'EUC-CN',
5283
            'EUC-JP',
5284
        ];
5285
5286 15
        if (self::$SUPPORT['mbstring'] === true) {
5287
            // info: do not use the symfony polyfill here
5288 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5289 15
            if ($encoding) {
5290 15
                return $encoding;
5291
            }
5292
        }
5293
5294
        //
5295
        // 5.) check via "iconv()"
5296
        //
5297
5298
        if (self::$ENCODINGS === null) {
5299
            self::$ENCODINGS = self::getData('encodings');
5300
        }
5301
5302
        foreach (self::$ENCODINGS as $encoding_tmp) {
5303
            // INFO: //IGNORE but still throw notice
5304
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5305
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5306
                return $encoding_tmp;
5307
            }
5308
        }
5309
5310
        return false;
5311
    }
5312
5313
    /**
5314
     * alias for "UTF8::str_ends_with()"
5315
     *
5316
     * @param string $haystack
5317
     * @param string $needle
5318
     *
5319
     * @return bool
5320
     *
5321
     * @see UTF8::str_ends_with()
5322
     */
5323
    public static function str_ends(string $haystack, string $needle): bool
5324
    {
5325
        return self::str_ends_with($haystack, $needle);
5326
    }
5327
5328
    /**
5329
     * Check if the string ends with the given substring.
5330
     *
5331
     * @param string $haystack <p>The string to search in.</p>
5332
     * @param string $needle   <p>The substring to search for.</p>
5333
     *
5334
     * @return bool
5335
     */
5336 9
    public static function str_ends_with(string $haystack, string $needle): bool
5337
    {
5338 9
        if ($needle === '') {
5339 2
            return true;
5340
        }
5341
5342 9
        if ($haystack === '') {
5343
            return false;
5344
        }
5345
5346 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5347
    }
5348
5349
    /**
5350
     * Returns true if the string ends with any of $substrings, false otherwise.
5351
     *
5352
     * - case-sensitive
5353
     *
5354
     * @param string   $str        <p>The input string.</p>
5355
     * @param string[] $substrings <p>Substrings to look for.</p>
5356
     *
5357
     * @return bool whether or not $str ends with $substring
5358
     */
5359 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5360
    {
5361 7
        if ($substrings === []) {
5362
            return false;
5363
        }
5364
5365 7
        foreach ($substrings as &$substring) {
5366 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5367 7
                return true;
5368
            }
5369
        }
5370
5371 6
        return false;
5372
    }
5373
5374
    /**
5375
     * Ensures that the string begins with $substring. If it doesn't, it's
5376
     * prepended.
5377
     *
5378
     * @param string $str       <p>The input string.</p>
5379
     * @param string $substring <p>The substring to add if not present.</p>
5380
     *
5381
     * @return string
5382
     */
5383 10
    public static function str_ensure_left(string $str, string $substring): string
5384
    {
5385
        if (
5386 10
            $substring !== ''
5387
            &&
5388 10
            \strpos($str, $substring) === 0
5389
        ) {
5390 6
            return $str;
5391
        }
5392
5393 4
        return $substring . $str;
5394
    }
5395
5396
    /**
5397
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5398
     *
5399
     * @param string $str       <p>The input string.</p>
5400
     * @param string $substring <p>The substring to add if not present.</p>
5401
     *
5402
     * @return string
5403
     */
5404 10
    public static function str_ensure_right(string $str, string $substring): string
5405
    {
5406
        if (
5407 10
            $str === ''
5408
            ||
5409 10
            $substring === ''
5410
            ||
5411 10
            \substr($str, -\strlen($substring)) !== $substring
5412
        ) {
5413 4
            $str .= $substring;
5414
        }
5415
5416 10
        return $str;
5417
    }
5418
5419
    /**
5420
     * Capitalizes the first word of the string, replaces underscores with
5421
     * spaces, and strips '_id'.
5422
     *
5423
     * @param string $str
5424
     *
5425
     * @return string
5426
     */
5427 3
    public static function str_humanize($str): string
5428
    {
5429 3
        $str = \str_replace(
5430
            [
5431 3
                '_id',
5432
                '_',
5433
            ],
5434
            [
5435 3
                '',
5436
                ' ',
5437
            ],
5438 3
            $str
5439
        );
5440
5441 3
        return self::ucfirst(\trim($str));
5442
    }
5443
5444
    /**
5445
     * alias for "UTF8::str_istarts_with()"
5446
     *
5447
     * @param string $haystack
5448
     * @param string $needle
5449
     *
5450
     * @return bool
5451
     *
5452
     * @see UTF8::str_istarts_with()
5453
     */
5454
    public static function str_ibegins(string $haystack, string $needle): bool
5455
    {
5456
        return self::str_istarts_with($haystack, $needle);
5457
    }
5458
5459
    /**
5460
     * alias for "UTF8::str_iends_with()"
5461
     *
5462
     * @param string $haystack
5463
     * @param string $needle
5464
     *
5465
     * @return bool
5466
     *
5467
     * @see UTF8::str_iends_with()
5468
     */
5469
    public static function str_iends(string $haystack, string $needle): bool
5470
    {
5471
        return self::str_iends_with($haystack, $needle);
5472
    }
5473
5474
    /**
5475
     * Check if the string ends with the given substring, case-insensitive.
5476
     *
5477
     * @param string $haystack <p>The string to search in.</p>
5478
     * @param string $needle   <p>The substring to search for.</p>
5479
     *
5480
     * @return bool
5481
     */
5482 12
    public static function str_iends_with(string $haystack, string $needle): bool
5483
    {
5484 12
        if ($needle === '') {
5485 2
            return true;
5486
        }
5487
5488 12
        if ($haystack === '') {
5489
            return false;
5490
        }
5491
5492 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5493
    }
5494
5495
    /**
5496
     * Returns true if the string ends with any of $substrings, false otherwise.
5497
     *
5498
     * - case-insensitive
5499
     *
5500
     * @param string   $str        <p>The input string.</p>
5501
     * @param string[] $substrings <p>Substrings to look for.</p>
5502
     *
5503
     * @return bool whether or not $str ends with $substring
5504
     */
5505 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5506
    {
5507 4
        if ($substrings === []) {
5508
            return false;
5509
        }
5510
5511 4
        foreach ($substrings as &$substring) {
5512 4
            if (self::str_iends_with($str, $substring)) {
5513 4
                return true;
5514
            }
5515
        }
5516
5517
        return false;
5518
    }
5519
5520
    /**
5521
     * Returns the index of the first occurrence of $needle in the string,
5522
     * and false if not found. Accepts an optional offset from which to begin
5523
     * the search.
5524
     *
5525
     * @param string $str      <p>The input string.</p>
5526
     * @param string $needle   <p>Substring to look for.</p>
5527
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5528
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5529
     *
5530
     * @return false|int
5531
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5532
     */
5533 2
    public static function str_iindex_first(
5534
        string $str,
5535
        string $needle,
5536
        int $offset = 0,
5537
        string $encoding = 'UTF-8'
5538
    ) {
5539 2
        return self::stripos(
5540 2
            $str,
5541 2
            $needle,
5542 2
            $offset,
5543 2
            $encoding
5544
        );
5545
    }
5546
5547
    /**
5548
     * Returns the index of the last occurrence of $needle in the string,
5549
     * and false if not found. Accepts an optional offset from which to begin
5550
     * the search. Offsets may be negative to count from the last character
5551
     * in the string.
5552
     *
5553
     * @param string $str      <p>The input string.</p>
5554
     * @param string $needle   <p>Substring to look for.</p>
5555
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5556
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5557
     *
5558
     * @return false|int
5559
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5560
     */
5561
    public static function str_iindex_last(
5562
        string $str,
5563
        string $needle,
5564
        int $offset = 0,
5565
        string $encoding = 'UTF-8'
5566
    ) {
5567
        return self::strripos(
5568
            $str,
5569
            $needle,
5570
            $offset,
5571
            $encoding
5572
        );
5573
    }
5574
5575
    /**
5576
     * Returns the index of the first occurrence of $needle in the string,
5577
     * and false if not found. Accepts an optional offset from which to begin
5578
     * the search.
5579
     *
5580
     * @param string $str      <p>The input string.</p>
5581
     * @param string $needle   <p>Substring to look for.</p>
5582
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5583
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5584
     *
5585
     * @return false|int
5586
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5587
     */
5588 10
    public static function str_index_first(
5589
        string $str,
5590
        string $needle,
5591
        int $offset = 0,
5592
        string $encoding = 'UTF-8'
5593
    ) {
5594 10
        return self::strpos(
5595 10
            $str,
5596 10
            $needle,
5597 10
            $offset,
5598 10
            $encoding
5599
        );
5600
    }
5601
5602
    /**
5603
     * Returns the index of the last occurrence of $needle in the string,
5604
     * and false if not found. Accepts an optional offset from which to begin
5605
     * the search. Offsets may be negative to count from the last character
5606
     * in the string.
5607
     *
5608
     * @param string $str      <p>The input string.</p>
5609
     * @param string $needle   <p>Substring to look for.</p>
5610
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5611
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5612
     *
5613
     * @return false|int
5614
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5615
     */
5616 10
    public static function str_index_last(
5617
        string $str,
5618
        string $needle,
5619
        int $offset = 0,
5620
        string $encoding = 'UTF-8'
5621
    ) {
5622 10
        return self::strrpos(
5623 10
            $str,
5624 10
            $needle,
5625 10
            $offset,
5626 10
            $encoding
5627
        );
5628
    }
5629
5630
    /**
5631
     * Inserts $substring into the string at the $index provided.
5632
     *
5633
     * @param string $str       <p>The input string.</p>
5634
     * @param string $substring <p>String to be inserted.</p>
5635
     * @param int    $index     <p>The index at which to insert the substring.</p>
5636
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5637
     *
5638
     * @return string
5639
     */
5640 8
    public static function str_insert(
5641
        string $str,
5642
        string $substring,
5643
        int $index,
5644
        string $encoding = 'UTF-8'
5645
    ): string {
5646 8
        if ($encoding === 'UTF-8') {
5647 4
            $len = (int) \mb_strlen($str);
5648 4
            if ($index > $len) {
5649
                return $str;
5650
            }
5651
5652
            /** @noinspection UnnecessaryCastingInspection */
5653 4
            return (string) \mb_substr($str, 0, $index) .
5654 4
                   $substring .
5655 4
                   (string) \mb_substr($str, $index, $len);
5656
        }
5657
5658 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5659
5660 4
        $len = (int) self::strlen($str, $encoding);
5661 4
        if ($index > $len) {
5662 1
            return $str;
5663
        }
5664
5665 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5666 3
               $substring .
5667 3
               ((string) self::substr($str, $index, $len, $encoding));
5668
    }
5669
5670
    /**
5671
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5672
     *
5673
     * @see http://php.net/manual/en/function.str-ireplace.php
5674
     *
5675
     * @param mixed $search  <p>
5676
     *                       Every replacement with search array is
5677
     *                       performed on the result of previous replacement.
5678
     *                       </p>
5679
     * @param mixed $replace <p>
5680
     *                       </p>
5681
     * @param mixed $subject <p>
5682
     *                       If subject is an array, then the search and
5683
     *                       replace is performed with every entry of
5684
     *                       subject, and the return value is an array as
5685
     *                       well.
5686
     *                       </p>
5687
     * @param int   $count   [optional] <p>
5688
     *                       The number of matched and replaced needles will
5689
     *                       be returned in count which is passed by
5690
     *                       reference.
5691
     *                       </p>
5692
     *
5693
     * @return mixed a string or an array of replacements
5694
     */
5695 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5696
    {
5697 29
        $search = (array) $search;
5698
5699
        /** @noinspection AlterInForeachInspection */
5700 29
        foreach ($search as &$s) {
5701 29
            $s = (string) $s;
5702 29
            if ($s === '') {
5703 6
                $s = '/^(?<=.)$/';
5704
            } else {
5705 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5706
            }
5707
        }
5708
5709 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5710 29
        $count = $replace; // used as reference parameter
5711
5712 29
        return $subject;
5713
    }
5714
5715
    /**
5716
     * Replaces $search from the beginning of string with $replacement.
5717
     *
5718
     * @param string $str         <p>The input string.</p>
5719
     * @param string $search      <p>The string to search for.</p>
5720
     * @param string $replacement <p>The replacement.</p>
5721
     *
5722
     * @return string string after the replacements
5723
     */
5724 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5725
    {
5726 17
        if ($str === '') {
5727 4
            if ($replacement === '') {
5728 2
                return '';
5729
            }
5730
5731 2
            if ($search === '') {
5732 2
                return $replacement;
5733
            }
5734
        }
5735
5736 13
        if ($search === '') {
5737 2
            return $str . $replacement;
5738
        }
5739
5740 11
        if (\stripos($str, $search) === 0) {
5741 10
            return $replacement . \substr($str, \strlen($search));
5742
        }
5743
5744 1
        return $str;
5745
    }
5746
5747
    /**
5748
     * Replaces $search from the ending of string with $replacement.
5749
     *
5750
     * @param string $str         <p>The input string.</p>
5751
     * @param string $search      <p>The string to search for.</p>
5752
     * @param string $replacement <p>The replacement.</p>
5753
     *
5754
     * @return string string after the replacements
5755
     */
5756 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5757
    {
5758 17
        if ($str === '') {
5759 4
            if ($replacement === '') {
5760 2
                return '';
5761
            }
5762
5763 2
            if ($search === '') {
5764 2
                return $replacement;
5765
            }
5766
        }
5767
5768 13
        if ($search === '') {
5769 2
            return $str . $replacement;
5770
        }
5771
5772 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5773 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5774
        }
5775
5776 11
        return $str;
5777
    }
5778
5779
    /**
5780
     * Check if the string starts with the given substring, case-insensitive.
5781
     *
5782
     * @param string $haystack <p>The string to search in.</p>
5783
     * @param string $needle   <p>The substring to search for.</p>
5784
     *
5785
     * @return bool
5786
     */
5787 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5788
    {
5789 12
        if ($needle === '') {
5790 2
            return true;
5791
        }
5792
5793 12
        if ($haystack === '') {
5794
            return false;
5795
        }
5796
5797 12
        return self::stripos($haystack, $needle) === 0;
5798
    }
5799
5800
    /**
5801
     * Returns true if the string begins with any of $substrings, false otherwise.
5802
     *
5803
     * - case-insensitive
5804
     *
5805
     * @param string $str        <p>The input string.</p>
5806
     * @param array  $substrings <p>Substrings to look for.</p>
5807
     *
5808
     * @return bool whether or not $str starts with $substring
5809
     */
5810 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5811
    {
5812 4
        if ($str === '') {
5813
            return false;
5814
        }
5815
5816 4
        if ($substrings === []) {
5817
            return false;
5818
        }
5819
5820 4
        foreach ($substrings as &$substring) {
5821 4
            if (self::str_istarts_with($str, $substring)) {
5822 4
                return true;
5823
            }
5824
        }
5825
5826
        return false;
5827
    }
5828
5829
    /**
5830
     * Gets the substring after the first occurrence of a separator.
5831
     *
5832
     * @param string $str       <p>The input string.</p>
5833
     * @param string $separator <p>The string separator.</p>
5834
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5835
     *
5836
     * @return string
5837
     */
5838 1
    public static function str_isubstr_after_first_separator(
5839
        string $str,
5840
        string $separator,
5841
        string $encoding = 'UTF-8'
5842
    ): string {
5843 1
        if ($separator === '' || $str === '') {
5844 1
            return '';
5845
        }
5846
5847 1
        $offset = self::str_iindex_first($str, $separator);
5848 1
        if ($offset === false) {
5849 1
            return '';
5850
        }
5851
5852 1
        if ($encoding === 'UTF-8') {
5853 1
            return (string) \mb_substr(
5854 1
                $str,
5855 1
                $offset + (int) \mb_strlen($separator)
5856
            );
5857
        }
5858
5859
        return (string) self::substr(
5860
            $str,
5861
            $offset + (int) self::strlen($separator, $encoding),
5862
            null,
5863
            $encoding
5864
        );
5865
    }
5866
5867
    /**
5868
     * Gets the substring after the last occurrence of a separator.
5869
     *
5870
     * @param string $str       <p>The input string.</p>
5871
     * @param string $separator <p>The string separator.</p>
5872
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5873
     *
5874
     * @return string
5875
     */
5876 1
    public static function str_isubstr_after_last_separator(
5877
        string $str,
5878
        string $separator,
5879
        string $encoding = 'UTF-8'
5880
    ): string {
5881 1
        if ($separator === '' || $str === '') {
5882 1
            return '';
5883
        }
5884
5885 1
        $offset = self::strripos($str, $separator);
5886 1
        if ($offset === false) {
5887 1
            return '';
5888
        }
5889
5890 1
        if ($encoding === 'UTF-8') {
5891 1
            return (string) \mb_substr(
5892 1
                $str,
5893 1
                $offset + (int) self::strlen($separator)
5894
            );
5895
        }
5896
5897
        return (string) self::substr(
5898
            $str,
5899
            $offset + (int) self::strlen($separator, $encoding),
5900
            null,
5901
            $encoding
5902
        );
5903
    }
5904
5905
    /**
5906
     * Gets the substring before the first occurrence of a separator.
5907
     *
5908
     * @param string $str       <p>The input string.</p>
5909
     * @param string $separator <p>The string separator.</p>
5910
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5911
     *
5912
     * @return string
5913
     */
5914 1
    public static function str_isubstr_before_first_separator(
5915
        string $str,
5916
        string $separator,
5917
        string $encoding = 'UTF-8'
5918
    ): string {
5919 1
        if ($separator === '' || $str === '') {
5920 1
            return '';
5921
        }
5922
5923 1
        $offset = self::str_iindex_first($str, $separator);
5924 1
        if ($offset === false) {
5925 1
            return '';
5926
        }
5927
5928 1
        if ($encoding === 'UTF-8') {
5929 1
            return (string) \mb_substr($str, 0, $offset);
5930
        }
5931
5932
        return (string) self::substr($str, 0, $offset, $encoding);
5933
    }
5934
5935
    /**
5936
     * Gets the substring before the last occurrence of a separator.
5937
     *
5938
     * @param string $str       <p>The input string.</p>
5939
     * @param string $separator <p>The string separator.</p>
5940
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5941
     *
5942
     * @return string
5943
     */
5944 1
    public static function str_isubstr_before_last_separator(
5945
        string $str,
5946
        string $separator,
5947
        string $encoding = 'UTF-8'
5948
    ): string {
5949 1
        if ($separator === '' || $str === '') {
5950 1
            return '';
5951
        }
5952
5953 1
        if ($encoding === 'UTF-8') {
5954 1
            $offset = \mb_strripos($str, $separator);
5955 1
            if ($offset === false) {
5956 1
                return '';
5957
            }
5958
5959 1
            return (string) \mb_substr($str, 0, $offset);
5960
        }
5961
5962
        $offset = self::strripos($str, $separator, 0, $encoding);
5963
        if ($offset === false) {
5964
            return '';
5965
        }
5966
5967
        return (string) self::substr($str, 0, $offset, $encoding);
5968
    }
5969
5970
    /**
5971
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
5972
     *
5973
     * @param string $str           <p>The input string.</p>
5974
     * @param string $needle        <p>The string to look for.</p>
5975
     * @param bool   $before_needle [optional] <p>Default: false</p>
5976
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
5977
     *
5978
     * @return string
5979
     */
5980 2
    public static function str_isubstr_first(
5981
        string $str,
5982
        string $needle,
5983
        bool $before_needle = false,
5984
        string $encoding = 'UTF-8'
5985
    ): string {
5986
        if (
5987 2
            $needle === ''
5988
            ||
5989 2
            $str === ''
5990
        ) {
5991 2
            return '';
5992
        }
5993
5994 2
        $part = self::stristr(
5995 2
            $str,
5996 2
            $needle,
5997 2
            $before_needle,
5998 2
            $encoding
5999
        );
6000 2
        if ($part === false) {
6001 2
            return '';
6002
        }
6003
6004 2
        return $part;
6005
    }
6006
6007
    /**
6008
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6009
     *
6010
     * @param string $str           <p>The input string.</p>
6011
     * @param string $needle        <p>The string to look for.</p>
6012
     * @param bool   $before_needle [optional] <p>Default: false</p>
6013
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6014
     *
6015
     * @return string
6016
     */
6017 1
    public static function str_isubstr_last(
6018
        string $str,
6019
        string $needle,
6020
        bool $before_needle = false,
6021
        string $encoding = 'UTF-8'
6022
    ): string {
6023
        if (
6024 1
            $needle === ''
6025
            ||
6026 1
            $str === ''
6027
        ) {
6028 1
            return '';
6029
        }
6030
6031 1
        $part = self::strrichr(
6032 1
            $str,
6033 1
            $needle,
6034 1
            $before_needle,
6035 1
            $encoding
6036
        );
6037 1
        if ($part === false) {
6038 1
            return '';
6039
        }
6040
6041 1
        return $part;
6042
    }
6043
6044
    /**
6045
     * Returns the last $n characters of the string.
6046
     *
6047
     * @param string $str      <p>The input string.</p>
6048
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6049
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6050
     *
6051
     * @return string
6052
     */
6053 12
    public static function str_last_char(
6054
        string $str,
6055
        int $n = 1,
6056
        string $encoding = 'UTF-8'
6057
    ): string {
6058 12
        if ($str === '' || $n <= 0) {
6059 4
            return '';
6060
        }
6061
6062 8
        if ($encoding === 'UTF-8') {
6063 4
            return (string) \mb_substr($str, -$n);
6064
        }
6065
6066 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6067
6068 4
        return (string) self::substr($str, -$n, null, $encoding);
6069
    }
6070
6071
    /**
6072
     * Limit the number of characters in a string.
6073
     *
6074
     * @param string $str        <p>The input string.</p>
6075
     * @param int    $length     [optional] <p>Default: 100</p>
6076
     * @param string $str_add_on [optional] <p>Default: …</p>
6077
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6078
     *
6079
     * @return string
6080
     */
6081 2
    public static function str_limit(
6082
        string $str,
6083
        int $length = 100,
6084
        string $str_add_on = '…',
6085
        string $encoding = 'UTF-8'
6086
    ): string {
6087 2
        if ($str === '' || $length <= 0) {
6088 2
            return '';
6089
        }
6090
6091 2
        if ($encoding === 'UTF-8') {
6092 2
            if ((int) \mb_strlen($str) <= $length) {
6093 2
                return $str;
6094
            }
6095
6096
            /** @noinspection UnnecessaryCastingInspection */
6097 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6098
        }
6099
6100
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6101
6102
        if ((int) self::strlen($str, $encoding) <= $length) {
6103
            return $str;
6104
        }
6105
6106
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6107
    }
6108
6109
    /**
6110
     * Limit the number of characters in a string, but also after the next word.
6111
     *
6112
     * @param string $str        <p>The input string.</p>
6113
     * @param int    $length     [optional] <p>Default: 100</p>
6114
     * @param string $str_add_on [optional] <p>Default: …</p>
6115
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6116
     *
6117
     * @return string
6118
     */
6119 6
    public static function str_limit_after_word(
6120
        string $str,
6121
        int $length = 100,
6122
        string $str_add_on = '…',
6123
        string $encoding = 'UTF-8'
6124
    ): string {
6125 6
        if ($str === '' || $length <= 0) {
6126 2
            return '';
6127
        }
6128
6129 6
        if ($encoding === 'UTF-8') {
6130
            /** @noinspection UnnecessaryCastingInspection */
6131 2
            if ((int) \mb_strlen($str) <= $length) {
6132 2
                return $str;
6133
            }
6134
6135 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6136 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6137
            }
6138
6139 2
            $str = \mb_substr($str, 0, $length);
6140
6141 2
            $array = \explode(' ', $str);
6142 2
            \array_pop($array);
6143 2
            $new_str = \implode(' ', $array);
6144
6145 2
            if ($new_str === '') {
6146 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6147
            }
6148
        } else {
6149 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6150
                return $str;
6151
            }
6152
6153 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6154 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6155
            }
6156
6157
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6158 1
            $str = self::substr($str, 0, $length, $encoding);
6159
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6160 1
            if ($str === false) {
6161
                return '' . $str_add_on;
6162
            }
6163
6164 1
            $array = \explode(' ', $str);
6165 1
            \array_pop($array);
6166 1
            $new_str = \implode(' ', $array);
6167
6168 1
            if ($new_str === '') {
6169
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6170
            }
6171
        }
6172
6173 3
        return $new_str . $str_add_on;
6174
    }
6175
6176
    /**
6177
     * Returns the longest common prefix between the $str1 and $str2.
6178
     *
6179
     * @param string $str1     <p>The input sting.</p>
6180
     * @param string $str2     <p>Second string for comparison.</p>
6181
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6182
     *
6183
     * @return string
6184
     */
6185 10
    public static function str_longest_common_prefix(
6186
        string $str1,
6187
        string $str2,
6188
        string $encoding = 'UTF-8'
6189
    ): string {
6190
        // init
6191 10
        $longest_common_prefix = '';
6192
6193 10
        if ($encoding === 'UTF-8') {
6194 5
            $max_length = (int) \min(
6195 5
                \mb_strlen($str1),
6196 5
                \mb_strlen($str2)
6197
            );
6198
6199 5
            for ($i = 0; $i < $max_length; ++$i) {
6200 4
                $char = \mb_substr($str1, $i, 1);
6201
6202
                if (
6203 4
                    $char !== false
6204
                    &&
6205 4
                    $char === \mb_substr($str2, $i, 1)
6206
                ) {
6207 3
                    $longest_common_prefix .= $char;
6208
                } else {
6209 3
                    break;
6210
                }
6211
            }
6212
        } else {
6213 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6214
6215 5
            $max_length = (int) \min(
6216 5
                self::strlen($str1, $encoding),
6217 5
                self::strlen($str2, $encoding)
6218
            );
6219
6220 5
            for ($i = 0; $i < $max_length; ++$i) {
6221 4
                $char = self::substr($str1, $i, 1, $encoding);
6222
6223
                if (
6224 4
                    $char !== false
6225
                    &&
6226 4
                    $char === self::substr($str2, $i, 1, $encoding)
6227
                ) {
6228 3
                    $longest_common_prefix .= $char;
6229
                } else {
6230 3
                    break;
6231
                }
6232
            }
6233
        }
6234
6235 10
        return $longest_common_prefix;
6236
    }
6237
6238
    /**
6239
     * Returns the longest common substring between the $str1 and $str2.
6240
     * In the case of ties, it returns that which occurs first.
6241
     *
6242
     * @param string $str1
6243
     * @param string $str2     <p>Second string for comparison.</p>
6244
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6245
     *
6246
     * @return string string with its $str being the longest common substring
6247
     */
6248 11
    public static function str_longest_common_substring(
6249
        string $str1,
6250
        string $str2,
6251
        string $encoding = 'UTF-8'
6252
    ): string {
6253 11
        if ($str1 === '' || $str2 === '') {
6254 2
            return '';
6255
        }
6256
6257
        // Uses dynamic programming to solve
6258
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6259
6260 9
        if ($encoding === 'UTF-8') {
6261 4
            $str_length = (int) \mb_strlen($str1);
6262 4
            $other_length = (int) \mb_strlen($str2);
6263
        } else {
6264 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6265
6266 5
            $str_length = (int) self::strlen($str1, $encoding);
6267 5
            $other_length = (int) self::strlen($str2, $encoding);
6268
        }
6269
6270
        // Return if either string is empty
6271 9
        if ($str_length === 0 || $other_length === 0) {
6272
            return '';
6273
        }
6274
6275 9
        $len = 0;
6276 9
        $end = 0;
6277 9
        $table = \array_fill(
6278 9
            0,
6279 9
            $str_length + 1,
6280 9
            \array_fill(0, $other_length + 1, 0)
6281
        );
6282
6283 9
        if ($encoding === 'UTF-8') {
6284 9
            for ($i = 1; $i <= $str_length; ++$i) {
6285 9
                for ($j = 1; $j <= $other_length; ++$j) {
6286 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6287 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6288
6289 9
                    if ($str_char === $other_char) {
6290 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6291 8
                        if ($table[$i][$j] > $len) {
6292 8
                            $len = $table[$i][$j];
6293 8
                            $end = $i;
6294
                        }
6295
                    } else {
6296 9
                        $table[$i][$j] = 0;
6297
                    }
6298
                }
6299
            }
6300
        } else {
6301
            for ($i = 1; $i <= $str_length; ++$i) {
6302
                for ($j = 1; $j <= $other_length; ++$j) {
6303
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6304
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6305
6306
                    if ($str_char === $other_char) {
6307
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6308
                        if ($table[$i][$j] > $len) {
6309
                            $len = $table[$i][$j];
6310
                            $end = $i;
6311
                        }
6312
                    } else {
6313
                        $table[$i][$j] = 0;
6314
                    }
6315
                }
6316
            }
6317
        }
6318
6319 9
        if ($encoding === 'UTF-8') {
6320 9
            return (string) \mb_substr($str1, $end - $len, $len);
6321
        }
6322
6323
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6324
    }
6325
6326
    /**
6327
     * Returns the longest common suffix between the $str1 and $str2.
6328
     *
6329
     * @param string $str1
6330
     * @param string $str2     <p>Second string for comparison.</p>
6331
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6332
     *
6333
     * @return string
6334
     */
6335 10
    public static function str_longest_common_suffix(
6336
        string $str1,
6337
        string $str2,
6338
        string $encoding = 'UTF-8'
6339
    ): string {
6340 10
        if ($str1 === '' || $str2 === '') {
6341 2
            return '';
6342
        }
6343
6344 8
        if ($encoding === 'UTF-8') {
6345 4
            $max_length = (int) \min(
6346 4
                \mb_strlen($str1, $encoding),
6347 4
                \mb_strlen($str2, $encoding)
6348
            );
6349
6350 4
            $longest_common_suffix = '';
6351 4
            for ($i = 1; $i <= $max_length; ++$i) {
6352 4
                $char = \mb_substr($str1, -$i, 1);
6353
6354
                if (
6355 4
                    $char !== false
6356
                    &&
6357 4
                    $char === \mb_substr($str2, -$i, 1)
6358
                ) {
6359 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6360
                } else {
6361 3
                    break;
6362
                }
6363
            }
6364
        } else {
6365 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6366
6367 4
            $max_length = (int) \min(
6368 4
                self::strlen($str1, $encoding),
6369 4
                self::strlen($str2, $encoding)
6370
            );
6371
6372 4
            $longest_common_suffix = '';
6373 4
            for ($i = 1; $i <= $max_length; ++$i) {
6374 4
                $char = self::substr($str1, -$i, 1, $encoding);
6375
6376
                if (
6377 4
                    $char !== false
6378
                    &&
6379 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6380
                ) {
6381 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6382
                } else {
6383 3
                    break;
6384
                }
6385
            }
6386
        }
6387
6388 8
        return $longest_common_suffix;
6389
    }
6390
6391
    /**
6392
     * Returns true if $str matches the supplied pattern, false otherwise.
6393
     *
6394
     * @param string $str     <p>The input string.</p>
6395
     * @param string $pattern <p>Regex pattern to match against.</p>
6396
     *
6397
     * @return bool whether or not $str matches the pattern
6398
     */
6399
    public static function str_matches_pattern(string $str, string $pattern): bool
6400
    {
6401
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6402
    }
6403
6404
    /**
6405
     * Returns whether or not a character exists at an index. Offsets may be
6406
     * negative to count from the last character in the string. Implements
6407
     * part of the ArrayAccess interface.
6408
     *
6409
     * @param string $str      <p>The input string.</p>
6410
     * @param int    $offset   <p>The index to check.</p>
6411
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6412
     *
6413
     * @return bool whether or not the index exists
6414
     */
6415 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6416
    {
6417
        // init
6418 6
        $length = (int) self::strlen($str, $encoding);
6419
6420 6
        if ($offset >= 0) {
6421 3
            return $length > $offset;
6422
        }
6423
6424 3
        return $length >= \abs($offset);
6425
    }
6426
6427
    /**
6428
     * Returns the character at the given index. Offsets may be negative to
6429
     * count from the last character in the string. Implements part of the
6430
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6431
     * does not exist.
6432
     *
6433
     * @param string $str      <p>The input string.</p>
6434
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6435
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6436
     *
6437
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6438
     *
6439
     * @return string the character at the specified index
6440
     */
6441 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6442
    {
6443
        // init
6444 2
        $length = (int) self::strlen($str);
6445
6446
        if (
6447 2
            ($index >= 0 && $length <= $index)
6448
            ||
6449 2
            $length < \abs($index)
6450
        ) {
6451 1
            throw new \OutOfBoundsException('No character exists at the index');
6452
        }
6453
6454 1
        return self::char_at($str, $index, $encoding);
6455
    }
6456
6457
    /**
6458
     * Pad a UTF-8 string to a given length with another string.
6459
     *
6460
     * @param string     $str        <p>The input string.</p>
6461
     * @param int        $pad_length <p>The length of return string.</p>
6462
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6463
     * @param int|string $pad_type   [optional] <p>
6464
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6465
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6466
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6467
     *                               </p>
6468
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6469
     *
6470
     * @return string returns the padded string
6471
     */
6472 41
    public static function str_pad(
6473
        string $str,
6474
        int $pad_length,
6475
        string $pad_string = ' ',
6476
        $pad_type = \STR_PAD_RIGHT,
6477
        string $encoding = 'UTF-8'
6478
    ): string {
6479 41
        if ($pad_length === 0 || $pad_string === '') {
6480 1
            return $str;
6481
        }
6482
6483 41
        if ($pad_type !== (int) $pad_type) {
6484 13
            if ($pad_type === 'left') {
6485 3
                $pad_type = \STR_PAD_LEFT;
6486 10
            } elseif ($pad_type === 'right') {
6487 6
                $pad_type = \STR_PAD_RIGHT;
6488 4
            } elseif ($pad_type === 'both') {
6489 3
                $pad_type = \STR_PAD_BOTH;
6490
            } else {
6491 1
                throw new \InvalidArgumentException(
6492 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6493
                );
6494
            }
6495
        }
6496
6497 40
        if ($encoding === 'UTF-8') {
6498 25
            $str_length = (int) \mb_strlen($str);
6499
6500 25
            if ($pad_length >= $str_length) {
6501
                switch ($pad_type) {
6502 25
                    case \STR_PAD_LEFT:
6503 8
                        $ps_length = (int) \mb_strlen($pad_string);
6504
6505 8
                        $diff = ($pad_length - $str_length);
6506
6507 8
                        $pre = (string) \mb_substr(
6508 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6509 8
                            0,
6510 8
                            $diff
6511
                        );
6512 8
                        $post = '';
6513
6514 8
                        break;
6515
6516 20
                    case \STR_PAD_BOTH:
6517 14
                        $diff = ($pad_length - $str_length);
6518
6519 14
                        $ps_length_left = (int) \floor($diff / 2);
6520
6521 14
                        $ps_length_right = (int) \ceil($diff / 2);
6522
6523 14
                        $pre = (string) \mb_substr(
6524 14
                            \str_repeat($pad_string, $ps_length_left),
6525 14
                            0,
6526 14
                            $ps_length_left
6527
                        );
6528 14
                        $post = (string) \mb_substr(
6529 14
                            \str_repeat($pad_string, $ps_length_right),
6530 14
                            0,
6531 14
                            $ps_length_right
6532
                        );
6533
6534 14
                        break;
6535
6536 9
                    case \STR_PAD_RIGHT:
6537
                    default:
6538 9
                        $ps_length = (int) \mb_strlen($pad_string);
6539
6540 9
                        $diff = ($pad_length - $str_length);
6541
6542 9
                        $post = (string) \mb_substr(
6543 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6544 9
                            0,
6545 9
                            $diff
6546
                        );
6547 9
                        $pre = '';
6548
                }
6549
6550 25
                return $pre . $str . $post;
6551
            }
6552
6553 3
            return $str;
6554
        }
6555
6556 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6557
6558 15
        $str_length = (int) self::strlen($str, $encoding);
6559
6560 15
        if ($pad_length >= $str_length) {
6561
            switch ($pad_type) {
6562 14
                case \STR_PAD_LEFT:
6563 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6564
6565 5
                    $diff = ($pad_length - $str_length);
6566
6567 5
                    $pre = (string) self::substr(
6568 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6569 5
                        0,
6570 5
                        $diff,
6571 5
                        $encoding
6572
                    );
6573 5
                    $post = '';
6574
6575 5
                    break;
6576
6577 9
                case \STR_PAD_BOTH:
6578 3
                    $diff = ($pad_length - $str_length);
6579
6580 3
                    $ps_length_left = (int) \floor($diff / 2);
6581
6582 3
                    $ps_length_right = (int) \ceil($diff / 2);
6583
6584 3
                    $pre = (string) self::substr(
6585 3
                        \str_repeat($pad_string, $ps_length_left),
6586 3
                        0,
6587 3
                        $ps_length_left,
6588 3
                        $encoding
6589
                    );
6590 3
                    $post = (string) self::substr(
6591 3
                        \str_repeat($pad_string, $ps_length_right),
6592 3
                        0,
6593 3
                        $ps_length_right,
6594 3
                        $encoding
6595
                    );
6596
6597 3
                    break;
6598
6599 6
                case \STR_PAD_RIGHT:
6600
                default:
6601 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6602
6603 6
                    $diff = ($pad_length - $str_length);
6604
6605 6
                    $post = (string) self::substr(
6606 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6607 6
                        0,
6608 6
                        $diff,
6609 6
                        $encoding
6610
                    );
6611 6
                    $pre = '';
6612
            }
6613
6614 14
            return $pre . $str . $post;
6615
        }
6616
6617 1
        return $str;
6618
    }
6619
6620
    /**
6621
     * Returns a new string of a given length such that both sides of the
6622
     * string are padded. Alias for pad() with a $pad_type of 'both'.
6623
     *
6624
     * @param string $str
6625
     * @param int    $length   <p>Desired string length after padding.</p>
6626
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6627
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6628
     *
6629
     * @return string string with padding applied
6630
     */
6631 11
    public static function str_pad_both(
6632
        string $str,
6633
        int $length,
6634
        string $pad_str = ' ',
6635
        string $encoding = 'UTF-8'
6636
    ): string {
6637 11
        return self::str_pad(
6638 11
            $str,
6639 11
            $length,
6640 11
            $pad_str,
6641 11
            \STR_PAD_BOTH,
6642 11
            $encoding
6643
        );
6644
    }
6645
6646
    /**
6647
     * Returns a new string of a given length such that the beginning of the
6648
     * string is padded. Alias for pad() with a $pad_type of 'left'.
6649
     *
6650
     * @param string $str
6651
     * @param int    $length   <p>Desired string length after padding.</p>
6652
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6653
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6654
     *
6655
     * @return string string with left padding
6656
     */
6657 7
    public static function str_pad_left(
6658
        string $str,
6659
        int $length,
6660
        string $pad_str = ' ',
6661
        string $encoding = 'UTF-8'
6662
    ): string {
6663 7
        return self::str_pad(
6664 7
            $str,
6665 7
            $length,
6666 7
            $pad_str,
6667 7
            \STR_PAD_LEFT,
6668 7
            $encoding
6669
        );
6670
    }
6671
6672
    /**
6673
     * Returns a new string of a given length such that the end of the string
6674
     * is padded. Alias for pad() with a $pad_type of 'right'.
6675
     *
6676
     * @param string $str
6677
     * @param int    $length   <p>Desired string length after padding.</p>
6678
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6679
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6680
     *
6681
     * @return string string with right padding
6682
     */
6683 7
    public static function str_pad_right(
6684
        string $str,
6685
        int $length,
6686
        string $pad_str = ' ',
6687
        string $encoding = 'UTF-8'
6688
    ): string {
6689 7
        return self::str_pad(
6690 7
            $str,
6691 7
            $length,
6692 7
            $pad_str,
6693 7
            \STR_PAD_RIGHT,
6694 7
            $encoding
6695
        );
6696
    }
6697
6698
    /**
6699
     * Repeat a string.
6700
     *
6701
     * @param string $str        <p>
6702
     *                           The string to be repeated.
6703
     *                           </p>
6704
     * @param int    $multiplier <p>
6705
     *                           Number of time the input string should be
6706
     *                           repeated.
6707
     *                           </p>
6708
     *                           <p>
6709
     *                           multiplier has to be greater than or equal to 0.
6710
     *                           If the multiplier is set to 0, the function
6711
     *                           will return an empty string.
6712
     *                           </p>
6713
     *
6714
     * @return string the repeated string
6715
     */
6716 9
    public static function str_repeat(string $str, int $multiplier): string
6717
    {
6718 9
        $str = self::filter($str);
6719
6720 9
        return \str_repeat($str, $multiplier);
6721
    }
6722
6723
    /**
6724
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6725
     *
6726
     * Replace all occurrences of the search string with the replacement string
6727
     *
6728
     * @see http://php.net/manual/en/function.str-replace.php
6729
     *
6730
     * @param mixed $search  <p>
6731
     *                       The value being searched for, otherwise known as the needle.
6732
     *                       An array may be used to designate multiple needles.
6733
     *                       </p>
6734
     * @param mixed $replace <p>
6735
     *                       The replacement value that replaces found search
6736
     *                       values. An array may be used to designate multiple replacements.
6737
     *                       </p>
6738
     * @param mixed $subject <p>
6739
     *                       The string or array being searched and replaced on,
6740
     *                       otherwise known as the haystack.
6741
     *                       </p>
6742
     *                       <p>
6743
     *                       If subject is an array, then the search and
6744
     *                       replace is performed with every entry of
6745
     *                       subject, and the return value is an array as
6746
     *                       well.
6747
     *                       </p>
6748
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6749
     *
6750
     * @return mixed this function returns a string or an array with the replaced values
6751
     */
6752 12
    public static function str_replace(
6753
        $search,
6754
        $replace,
6755
        $subject,
6756
        int &$count = null
6757
    ) {
6758
        /**
6759
         * @psalm-suppress PossiblyNullArgument
6760
         */
6761 12
        return \str_replace(
6762 12
            $search,
6763 12
            $replace,
6764 12
            $subject,
6765 12
            $count
6766
        );
6767
    }
6768
6769
    /**
6770
     * Replaces $search from the beginning of string with $replacement.
6771
     *
6772
     * @param string $str         <p>The input string.</p>
6773
     * @param string $search      <p>The string to search for.</p>
6774
     * @param string $replacement <p>The replacement.</p>
6775
     *
6776
     * @return string string after the replacements
6777
     */
6778 17
    public static function str_replace_beginning(
6779
        string $str,
6780
        string $search,
6781
        string $replacement
6782
    ): string {
6783 17
        if ($str === '') {
6784 4
            if ($replacement === '') {
6785 2
                return '';
6786
            }
6787
6788 2
            if ($search === '') {
6789 2
                return $replacement;
6790
            }
6791
        }
6792
6793 13
        if ($search === '') {
6794 2
            return $str . $replacement;
6795
        }
6796
6797 11
        if (\strpos($str, $search) === 0) {
6798 9
            return $replacement . \substr($str, \strlen($search));
6799
        }
6800
6801 2
        return $str;
6802
    }
6803
6804
    /**
6805
     * Replaces $search from the ending of string with $replacement.
6806
     *
6807
     * @param string $str         <p>The input string.</p>
6808
     * @param string $search      <p>The string to search for.</p>
6809
     * @param string $replacement <p>The replacement.</p>
6810
     *
6811
     * @return string string after the replacements
6812
     */
6813 17
    public static function str_replace_ending(
6814
        string $str,
6815
        string $search,
6816
        string $replacement
6817
    ): string {
6818 17
        if ($str === '') {
6819 4
            if ($replacement === '') {
6820 2
                return '';
6821
            }
6822
6823 2
            if ($search === '') {
6824 2
                return $replacement;
6825
            }
6826
        }
6827
6828 13
        if ($search === '') {
6829 2
            return $str . $replacement;
6830
        }
6831
6832 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6833 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6834
        }
6835
6836 11
        return $str;
6837
    }
6838
6839
    /**
6840
     * Replace the first "$search"-term with the "$replace"-term.
6841
     *
6842
     * @param string $search
6843
     * @param string $replace
6844
     * @param string $subject
6845
     *
6846
     * @return string
6847
     *
6848
     * @psalm-suppress InvalidReturnType
6849
     */
6850 2
    public static function str_replace_first(
6851
        string $search,
6852
        string $replace,
6853
        string $subject
6854
    ): string {
6855 2
        $pos = self::strpos($subject, $search);
6856
6857 2
        if ($pos !== false) {
6858
            /**
6859
             * @psalm-suppress InvalidReturnStatement
6860
             */
6861 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6862 2
                $subject,
6863 2
                $replace,
6864 2
                $pos,
6865 2
                (int) self::strlen($search)
6866
            );
6867
        }
6868
6869 2
        return $subject;
6870
    }
6871
6872
    /**
6873
     * Replace the last "$search"-term with the "$replace"-term.
6874
     *
6875
     * @param string $search
6876
     * @param string $replace
6877
     * @param string $subject
6878
     *
6879
     * @return string
6880
     *
6881
     * @psalm-suppress InvalidReturnType
6882
     */
6883 2
    public static function str_replace_last(
6884
        string $search,
6885
        string $replace,
6886
        string $subject
6887
    ): string {
6888 2
        $pos = self::strrpos($subject, $search);
6889 2
        if ($pos !== false) {
6890
            /**
6891
             * @psalm-suppress InvalidReturnStatement
6892
             */
6893 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6894 2
                $subject,
6895 2
                $replace,
6896 2
                $pos,
6897 2
                (int) self::strlen($search)
6898
            );
6899
        }
6900
6901 2
        return $subject;
6902
    }
6903
6904
    /**
6905
     * Shuffles all the characters in the string.
6906
     *
6907
     * PS: uses random algorithm which is weak for cryptography purposes
6908
     *
6909
     * @param string $str      <p>The input string</p>
6910
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6911
     *
6912
     * @return string the shuffled string
6913
     */
6914 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6915
    {
6916 5
        if ($encoding === 'UTF-8') {
6917 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6918
            /** @noinspection NonSecureShuffleUsageInspection */
6919 5
            \shuffle($indexes);
6920
6921
            // init
6922 5
            $shuffled_str = '';
6923
6924 5
            foreach ($indexes as &$i) {
6925 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
6926 5
                if ($tmp_sub_str !== false) {
6927 5
                    $shuffled_str .= $tmp_sub_str;
6928
                }
6929
            }
6930
        } else {
6931
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6932
6933
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6934
            /** @noinspection NonSecureShuffleUsageInspection */
6935
            \shuffle($indexes);
6936
6937
            // init
6938
            $shuffled_str = '';
6939
6940
            foreach ($indexes as &$i) {
6941
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
6942
                if ($tmp_sub_str !== false) {
6943
                    $shuffled_str .= $tmp_sub_str;
6944
                }
6945
            }
6946
        }
6947
6948 5
        return $shuffled_str;
6949
    }
6950
6951
    /**
6952
     * Returns the substring beginning at $start, and up to, but not including
6953
     * the index specified by $end. If $end is omitted, the function extracts
6954
     * the remaining string. If $end is negative, it is computed from the end
6955
     * of the string.
6956
     *
6957
     * @param string $str
6958
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6959
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6960
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6961
     *
6962
     * @return false|string
6963
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6964
     *                      characters long, <b>FALSE</b> will be returned.
6965
     */
6966 18
    public static function str_slice(
6967
        string $str,
6968
        int $start,
6969
        int $end = null,
6970
        string $encoding = 'UTF-8'
6971
    ) {
6972 18
        if ($encoding === 'UTF-8') {
6973 7
            if ($end === null) {
6974 1
                $length = (int) \mb_strlen($str);
6975 6
            } elseif ($end >= 0 && $end <= $start) {
6976 2
                return '';
6977 4
            } elseif ($end < 0) {
6978 1
                $length = (int) \mb_strlen($str) + $end - $start;
6979
            } else {
6980 3
                $length = $end - $start;
6981
            }
6982
6983 5
            return \mb_substr($str, $start, $length);
6984
        }
6985
6986 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6987
6988 11
        if ($end === null) {
6989 5
            $length = (int) self::strlen($str, $encoding);
6990 6
        } elseif ($end >= 0 && $end <= $start) {
6991 2
            return '';
6992 4
        } elseif ($end < 0) {
6993 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
6994
        } else {
6995 3
            $length = $end - $start;
6996
        }
6997
6998 9
        return self::substr($str, $start, $length, $encoding);
6999
    }
7000
7001
    /**
7002
     * Convert a string to e.g.: "snake_case"
7003
     *
7004
     * @param string $str
7005
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7006
     *
7007
     * @return string string in snake_case
7008
     */
7009 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7010
    {
7011 22
        if ($str === '') {
7012
            return '';
7013
        }
7014
7015 22
        $str = \str_replace(
7016 22
            '-',
7017 22
            '_',
7018 22
            self::normalize_whitespace($str)
7019
        );
7020
7021 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7022 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7023
        }
7024
7025 22
        $str = (string) \preg_replace_callback(
7026 22
            '/([\\p{N}|\\p{Lu}])/u',
7027
            /**
7028
             * @param string[] $matches
7029
             *
7030
             * @return string
7031
             */
7032
            static function (array $matches) use ($encoding): string {
7033 9
                $match = $matches[1];
7034 9
                $match_int = (int) $match;
7035
7036 9
                if ((string) $match_int === $match) {
7037 4
                    return '_' . $match . '_';
7038
                }
7039
7040 5
                if ($encoding === 'UTF-8') {
7041 5
                    return '_' . \mb_strtolower($match);
7042
                }
7043
7044
                return '_' . self::strtolower($match, $encoding);
7045 22
            },
7046 22
            $str
7047
        );
7048
7049 22
        $str = (string) \preg_replace(
7050
            [
7051 22
                '/\\s+/u',           // convert spaces to "_"
7052
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7053
                '/_+/',                 // remove double "_"
7054
            ],
7055
            [
7056 22
                '_',
7057
                '',
7058
                '_',
7059
            ],
7060 22
            $str
7061
        );
7062
7063 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7064
    }
7065
7066
    /**
7067
     * Sort all characters according to code points.
7068
     *
7069
     * @param string $str    <p>A UTF-8 string.</p>
7070
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7071
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7072
     *
7073
     * @return string string of sorted characters
7074
     */
7075 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7076
    {
7077 2
        $array = self::codepoints($str);
7078
7079 2
        if ($unique) {
7080 2
            $array = \array_flip(\array_flip($array));
7081
        }
7082
7083 2
        if ($desc) {
7084 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7084
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7085
        } else {
7086 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7086
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7087
        }
7088
7089 2
        return self::string($array);
7090
    }
7091
7092
    /**
7093
     * Convert a string to an array of Unicode characters.
7094
     *
7095
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7096
     * @param int                       $length                  [optional] <p>Max character length of each array
7097
     *                                                           element.</p>
7098
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7099
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7100
     *                                                           "mb_substr"</p>
7101
     *
7102
     * @return array
7103
     *               <p>An array containing chunks of the input.</p>
7104
     */
7105 89
    public static function str_split(
7106
        $str,
7107
        int $length = 1,
7108
        bool $clean_utf8 = false,
7109
        bool $try_to_use_mb_functions = true
7110
    ): array {
7111 89
        if ($length <= 0) {
7112 3
            return [];
7113
        }
7114
7115 88
        if (\is_array($str) === true) {
7116 2
            foreach ($str as $k => &$v) {
7117 2
                $v = self::str_split(
7118 2
                    $v,
7119 2
                    $length,
7120 2
                    $clean_utf8,
7121 2
                    $try_to_use_mb_functions
7122
                );
7123
            }
7124
7125 2
            return $str;
7126
        }
7127
7128
        // init
7129 88
        $str = (string) $str;
7130
7131 88
        if ($str === '') {
7132 13
            return [];
7133
        }
7134
7135 85
        if ($clean_utf8 === true) {
7136 19
            $str = self::clean($str);
7137
        }
7138
7139
        if (
7140 85
            $try_to_use_mb_functions === true
7141
            &&
7142 85
            self::$SUPPORT['mbstring'] === true
7143
        ) {
7144 81
            $i_max = \mb_strlen($str);
7145 81
            if ($i_max <= 127) {
7146 75
                $ret = [];
7147 75
                for ($i = 0; $i < $i_max; ++$i) {
7148 75
                    $ret[] = \mb_substr($str, $i, 1);
7149
                }
7150
            } else {
7151 16
                $return_array = [];
7152 16
                \preg_match_all('/./us', $str, $return_array);
7153 81
                $ret = $return_array[0] ?? [];
7154
            }
7155 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7156 17
            $return_array = [];
7157 17
            \preg_match_all('/./us', $str, $return_array);
7158 17
            $ret = $return_array[0] ?? [];
7159
        } else {
7160
7161
            // fallback
7162
7163 8
            $ret = [];
7164 8
            $len = \strlen($str);
7165
7166
            /** @noinspection ForeachInvariantsInspection */
7167 8
            for ($i = 0; $i < $len; ++$i) {
7168 8
                if (($str[$i] & "\x80") === "\x00") {
7169 8
                    $ret[] = $str[$i];
7170
                } elseif (
7171 8
                    isset($str[$i + 1])
7172
                    &&
7173 8
                    ($str[$i] & "\xE0") === "\xC0"
7174
                ) {
7175 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7176 4
                        $ret[] = $str[$i] . $str[$i + 1];
7177
7178 4
                        ++$i;
7179
                    }
7180
                } elseif (
7181 6
                    isset($str[$i + 2])
7182
                    &&
7183 6
                    ($str[$i] & "\xF0") === "\xE0"
7184
                ) {
7185
                    if (
7186 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7187
                        &&
7188 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7189
                    ) {
7190 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7191
7192 6
                        $i += 2;
7193
                    }
7194
                } elseif (
7195
                    isset($str[$i + 3])
7196
                    &&
7197
                    ($str[$i] & "\xF8") === "\xF0"
7198
                ) {
7199
                    if (
7200
                        ($str[$i + 1] & "\xC0") === "\x80"
7201
                        &&
7202
                        ($str[$i + 2] & "\xC0") === "\x80"
7203
                        &&
7204
                        ($str[$i + 3] & "\xC0") === "\x80"
7205
                    ) {
7206
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7207
7208
                        $i += 3;
7209
                    }
7210
                }
7211
            }
7212
        }
7213
7214 85
        if ($length > 1) {
7215 11
            $ret = \array_chunk($ret, $length);
7216
7217 11
            return \array_map(
7218
                static function (array &$item): string {
7219 11
                    return \implode('', $item);
7220 11
                },
7221 11
                $ret
7222
            );
7223
        }
7224
7225 78
        if (isset($ret[0]) && $ret[0] === '') {
7226
            return [];
7227
        }
7228
7229 78
        return $ret;
7230
    }
7231
7232
    /**
7233
     * Splits the string with the provided regular expression, returning an
7234
     * array of Stringy objects. An optional integer $limit will truncate the
7235
     * results.
7236
     *
7237
     * @param string $str
7238
     * @param string $pattern <p>The regex with which to split the string.</p>
7239
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7240
     *
7241
     * @return string[] an array of strings
7242
     */
7243 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7244
    {
7245 16
        if ($limit === 0) {
7246 2
            return [];
7247
        }
7248
7249 14
        if ($pattern === '') {
7250 1
            return [$str];
7251
        }
7252
7253 13
        if (self::$SUPPORT['mbstring'] === true) {
7254 13
            if ($limit >= 0) {
7255
                /** @noinspection PhpComposerExtensionStubsInspection */
7256 8
                $result_tmp = \mb_split($pattern, $str);
7257
7258 8
                $result = [];
7259 8
                foreach ($result_tmp as $item_tmp) {
7260 8
                    if ($limit === 0) {
7261 4
                        break;
7262
                    }
7263 8
                    --$limit;
7264
7265 8
                    $result[] = $item_tmp;
7266
                }
7267
7268 8
                return $result;
7269
            }
7270
7271
            /** @noinspection PhpComposerExtensionStubsInspection */
7272 5
            return \mb_split($pattern, $str);
7273
        }
7274
7275
        if ($limit > 0) {
7276
            ++$limit;
7277
        } else {
7278
            $limit = -1;
7279
        }
7280
7281
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7282
7283
        if ($array === false) {
7284
            return [];
7285
        }
7286
7287
        if ($limit > 0 && \count($array) === $limit) {
7288
            \array_pop($array);
7289
        }
7290
7291
        return $array;
7292
    }
7293
7294
    /**
7295
     * Check if the string starts with the given substring.
7296
     *
7297
     * @param string $haystack <p>The string to search in.</p>
7298
     * @param string $needle   <p>The substring to search for.</p>
7299
     *
7300
     * @return bool
7301
     */
7302 19
    public static function str_starts_with(string $haystack, string $needle): bool
7303
    {
7304 19
        if ($needle === '') {
7305 2
            return true;
7306
        }
7307
7308 19
        if ($haystack === '') {
7309
            return false;
7310
        }
7311
7312 19
        return \strpos($haystack, $needle) === 0;
7313
    }
7314
7315
    /**
7316
     * Returns true if the string begins with any of $substrings, false otherwise.
7317
     *
7318
     * - case-sensitive
7319
     *
7320
     * @param string $str        <p>The input string.</p>
7321
     * @param array  $substrings <p>Substrings to look for.</p>
7322
     *
7323
     * @return bool whether or not $str starts with $substring
7324
     */
7325 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7326
    {
7327 8
        if ($str === '') {
7328
            return false;
7329
        }
7330
7331 8
        if ($substrings === []) {
7332
            return false;
7333
        }
7334
7335 8
        foreach ($substrings as &$substring) {
7336 8
            if (self::str_starts_with($str, $substring)) {
7337 8
                return true;
7338
            }
7339
        }
7340
7341 6
        return false;
7342
    }
7343
7344
    /**
7345
     * Gets the substring after the first occurrence of a separator.
7346
     *
7347
     * @param string $str       <p>The input string.</p>
7348
     * @param string $separator <p>The string separator.</p>
7349
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7350
     *
7351
     * @return string
7352
     */
7353 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7354
    {
7355 1
        if ($separator === '' || $str === '') {
7356 1
            return '';
7357
        }
7358
7359 1
        if ($encoding === 'UTF-8') {
7360 1
            $offset = \mb_strpos($str, $separator);
7361 1
            if ($offset === false) {
7362 1
                return '';
7363
            }
7364
7365 1
            return (string) \mb_substr(
7366 1
                $str,
7367 1
                $offset + (int) \mb_strlen($separator)
7368
            );
7369
        }
7370
7371
        $offset = self::strpos($str, $separator, 0, $encoding);
7372
        if ($offset === false) {
7373
            return '';
7374
        }
7375
7376
        return (string) \mb_substr(
7377
            $str,
7378
            $offset + (int) self::strlen($separator, $encoding),
7379
            null,
7380
            $encoding
7381
        );
7382
    }
7383
7384
    /**
7385
     * Gets the substring after the last occurrence of a separator.
7386
     *
7387
     * @param string $str       <p>The input string.</p>
7388
     * @param string $separator <p>The string separator.</p>
7389
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7390
     *
7391
     * @return string
7392
     */
7393 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7394
    {
7395 1
        if ($separator === '' || $str === '') {
7396 1
            return '';
7397
        }
7398
7399 1
        if ($encoding === 'UTF-8') {
7400 1
            $offset = \mb_strrpos($str, $separator);
7401 1
            if ($offset === false) {
7402 1
                return '';
7403
            }
7404
7405 1
            return (string) \mb_substr(
7406 1
                $str,
7407 1
                $offset + (int) \mb_strlen($separator)
7408
            );
7409
        }
7410
7411
        $offset = self::strrpos($str, $separator, 0, $encoding);
7412
        if ($offset === false) {
7413
            return '';
7414
        }
7415
7416
        return (string) self::substr(
7417
            $str,
7418
            $offset + (int) self::strlen($separator, $encoding),
7419
            null,
7420
            $encoding
7421
        );
7422
    }
7423
7424
    /**
7425
     * Gets the substring before the first occurrence of a separator.
7426
     *
7427
     * @param string $str       <p>The input string.</p>
7428
     * @param string $separator <p>The string separator.</p>
7429
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7430
     *
7431
     * @return string
7432
     */
7433 1
    public static function str_substr_before_first_separator(
7434
        string $str,
7435
        string $separator,
7436
        string $encoding = 'UTF-8'
7437
    ): string {
7438 1
        if ($separator === '' || $str === '') {
7439 1
            return '';
7440
        }
7441
7442 1
        if ($encoding === 'UTF-8') {
7443 1
            $offset = \mb_strpos($str, $separator);
7444 1
            if ($offset === false) {
7445 1
                return '';
7446
            }
7447
7448 1
            return (string) \mb_substr(
7449 1
                $str,
7450 1
                0,
7451 1
                $offset
7452
            );
7453
        }
7454
7455
        $offset = self::strpos($str, $separator, 0, $encoding);
7456
        if ($offset === false) {
7457
            return '';
7458
        }
7459
7460
        return (string) self::substr(
7461
            $str,
7462
            0,
7463
            $offset,
7464
            $encoding
7465
        );
7466
    }
7467
7468
    /**
7469
     * Gets the substring before the last occurrence of a separator.
7470
     *
7471
     * @param string $str       <p>The input string.</p>
7472
     * @param string $separator <p>The string separator.</p>
7473
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7474
     *
7475
     * @return string
7476
     */
7477 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7478
    {
7479 1
        if ($separator === '' || $str === '') {
7480 1
            return '';
7481
        }
7482
7483 1
        if ($encoding === 'UTF-8') {
7484 1
            $offset = \mb_strrpos($str, $separator);
7485 1
            if ($offset === false) {
7486 1
                return '';
7487
            }
7488
7489 1
            return (string) \mb_substr(
7490 1
                $str,
7491 1
                0,
7492 1
                $offset
7493
            );
7494
        }
7495
7496
        $offset = self::strrpos($str, $separator, 0, $encoding);
7497
        if ($offset === false) {
7498
            return '';
7499
        }
7500
7501
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7502
7503
        return (string) self::substr(
7504
            $str,
7505
            0,
7506
            $offset,
7507
            $encoding
7508
        );
7509
    }
7510
7511
    /**
7512
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7513
     *
7514
     * @param string $str           <p>The input string.</p>
7515
     * @param string $needle        <p>The string to look for.</p>
7516
     * @param bool   $before_needle [optional] <p>Default: false</p>
7517
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7518
     *
7519
     * @return string
7520
     */
7521 2
    public static function str_substr_first(
7522
        string $str,
7523
        string $needle,
7524
        bool $before_needle = false,
7525
        string $encoding = 'UTF-8'
7526
    ): string {
7527 2
        if ($str === '' || $needle === '') {
7528 2
            return '';
7529
        }
7530
7531 2
        if ($encoding === 'UTF-8') {
7532 2
            if ($before_needle === true) {
7533 1
                $part = \mb_strstr(
7534 1
                    $str,
7535 1
                    $needle,
7536 1
                    $before_needle
7537
                );
7538
            } else {
7539 1
                $part = \mb_strstr(
7540 1
                    $str,
7541 2
                    $needle
7542
                );
7543
            }
7544
        } else {
7545
            $part = self::strstr(
7546
                $str,
7547
                $needle,
7548
                $before_needle,
7549
                $encoding
7550
            );
7551
        }
7552
7553 2
        return $part === false ? '' : $part;
7554
    }
7555
7556
    /**
7557
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7558
     *
7559
     * @param string $str           <p>The input string.</p>
7560
     * @param string $needle        <p>The string to look for.</p>
7561
     * @param bool   $before_needle [optional] <p>Default: false</p>
7562
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7563
     *
7564
     * @return string
7565
     */
7566 2
    public static function str_substr_last(
7567
        string $str,
7568
        string $needle,
7569
        bool $before_needle = false,
7570
        string $encoding = 'UTF-8'
7571
    ): string {
7572 2
        if ($str === '' || $needle === '') {
7573 2
            return '';
7574
        }
7575
7576 2
        if ($encoding === 'UTF-8') {
7577 2
            if ($before_needle === true) {
7578 1
                $part = \mb_strrchr(
7579 1
                    $str,
7580 1
                    $needle,
7581 1
                    $before_needle
7582
                );
7583
            } else {
7584 1
                $part = \mb_strrchr(
7585 1
                    $str,
7586 2
                    $needle
7587
                );
7588
            }
7589
        } else {
7590
            $part = self::strrchr(
7591
                $str,
7592
                $needle,
7593
                $before_needle,
7594
                $encoding
7595
            );
7596
        }
7597
7598 2
        return $part === false ? '' : $part;
7599
    }
7600
7601
    /**
7602
     * Surrounds $str with the given substring.
7603
     *
7604
     * @param string $str
7605
     * @param string $substring <p>The substring to add to both sides.</P>
7606
     *
7607
     * @return string string with the substring both prepended and appended
7608
     */
7609 5
    public static function str_surround(string $str, string $substring): string
7610
    {
7611 5
        return $substring . $str . $substring;
7612
    }
7613
7614
    /**
7615
     * Returns a trimmed string with the first letter of each word capitalized.
7616
     * Also accepts an array, $ignore, allowing you to list words not to be
7617
     * capitalized.
7618
     *
7619
     * @param string              $str
7620
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7621
     *                                                           Default: null</p>
7622
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7623
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7624
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7625
     *                                                           tr</p>
7626
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7627
     *                                                           ß</p>
7628
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7629
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7630
     *
7631
     * @return string
7632
     *                <p>The titleized string.</p>
7633
     */
7634 10
    public static function str_titleize(
7635
        string $str,
7636
        array $ignore = null,
7637
        string $encoding = 'UTF-8',
7638
        bool $clean_utf8 = false,
7639
        string $lang = null,
7640
        bool $try_to_keep_the_string_length = false,
7641
        bool $use_trim_first = true,
7642
        string $word_define_chars = null
7643
    ): string {
7644 10
        if ($str === '') {
7645
            return '';
7646
        }
7647
7648 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7649 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7650
        }
7651
7652 10
        if ($use_trim_first === true) {
7653 10
            $str = \trim($str);
7654
        }
7655
7656 10
        if ($clean_utf8 === true) {
7657
            $str = self::clean($str);
7658
        }
7659
7660 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7661
7662 10
        if ($word_define_chars) {
7663 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7664
        } else {
7665 6
            $word_define_chars = '';
7666
        }
7667
7668 10
        $str = (string) \preg_replace_callback(
7669 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7670
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7671 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7672 4
                    return $match[0];
7673
                }
7674
7675 10
                if ($use_mb_functions === true) {
7676 10
                    if ($encoding === 'UTF-8') {
7677 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7678 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7679
                    }
7680
7681
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7682
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7683
                }
7684
7685
                return self::ucfirst(
7686
                    self::strtolower(
7687
                        $match[0],
7688
                        $encoding,
7689
                        false,
7690
                        $lang,
7691
                        $try_to_keep_the_string_length
7692
                    ),
7693
                    $encoding,
7694
                    false,
7695
                    $lang,
7696
                    $try_to_keep_the_string_length
7697
                );
7698 10
            },
7699 10
            $str
7700
        );
7701
7702 10
        return $str;
7703
    }
7704
7705
    /**
7706
     * Returns a trimmed string in proper title case.
7707
     *
7708
     * Also accepts an array, $ignore, allowing you to list words not to be
7709
     * capitalized.
7710
     *
7711
     * Adapted from John Gruber's script.
7712
     *
7713
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7714
     *
7715
     * @param string $str
7716
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7717
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7718
     *
7719
     * @return string the titleized string
7720
     */
7721 35
    public static function str_titleize_for_humans(
7722
        string $str,
7723
        array $ignore = [],
7724
        string $encoding = 'UTF-8'
7725
    ): string {
7726 35
        $small_words = \array_merge(
7727
            [
7728 35
                '(?<!q&)a',
7729
                'an',
7730
                'and',
7731
                'as',
7732
                'at(?!&t)',
7733
                'but',
7734
                'by',
7735
                'en',
7736
                'for',
7737
                'if',
7738
                'in',
7739
                'of',
7740
                'on',
7741
                'or',
7742
                'the',
7743
                'to',
7744
                'v[.]?',
7745
                'via',
7746
                'vs[.]?',
7747
            ],
7748 35
            $ignore
7749
        );
7750
7751 35
        $small_words_rx = \implode('|', $small_words);
7752 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7753
7754 35
        $str = \trim($str);
7755
7756 35
        if (self::has_lowercase($str) === false) {
7757 2
            $str = self::strtolower($str, $encoding);
7758
        }
7759
7760
        // the main substitutions
7761 35
        $str = (string) \preg_replace_callback(
7762
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7763
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7764 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7765
                        |
7766 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7767
                        |
7768 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7769
                        |
7770 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7771
                      ) (_*) \\b                                                          # 6. With trailing underscore
7772
                    ~ux',
7773
            /**
7774
             * @param string[] $matches
7775
             *
7776
             * @return string
7777
             */
7778
            static function (array $matches) use ($encoding): string {
7779
                // preserve leading underscore
7780 35
                $str = $matches[1];
7781 35
                if ($matches[2]) {
7782
                    // preserve URLs, domains, emails and file paths
7783 5
                    $str .= $matches[2];
7784 35
                } elseif ($matches[3]) {
7785
                    // lower-case small words
7786 25
                    $str .= self::strtolower($matches[3], $encoding);
7787 35
                } elseif ($matches[4]) {
7788
                    // capitalize word w/o internal caps
7789 34
                    $str .= static::ucfirst($matches[4], $encoding);
7790
                } else {
7791
                    // preserve other kinds of word (iPhone)
7792 7
                    $str .= $matches[5];
7793
                }
7794
                // preserve trailing underscore
7795 35
                $str .= $matches[6];
7796
7797 35
                return $str;
7798 35
            },
7799 35
            $str
7800
        );
7801
7802
        // Exceptions for small words: capitalize at start of title...
7803 35
        $str = (string) \preg_replace_callback(
7804
            '~(  \\A [[:punct:]]*            # start of title...
7805
                      |  [:.;?!][ ]+                # or of subsentence...
7806
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7807 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7808
                     ~uxi',
7809
            /**
7810
             * @param string[] $matches
7811
             *
7812
             * @return string
7813
             */
7814
            static function (array $matches) use ($encoding): string {
7815 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7816 35
            },
7817 35
            $str
7818
        );
7819
7820
        // ...and end of title
7821 35
        $str = (string) \preg_replace_callback(
7822 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
7823
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7824
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7825
                     ~uxi',
7826
            /**
7827
             * @param string[] $matches
7828
             *
7829
             * @return string
7830
             */
7831
            static function (array $matches) use ($encoding): string {
7832 3
                return static::ucfirst($matches[1], $encoding);
7833 35
            },
7834 35
            $str
7835
        );
7836
7837
        // Exceptions for small words in hyphenated compound words.
7838
        // e.g. "in-flight" -> In-Flight
7839 35
        $str = (string) \preg_replace_callback(
7840
            '~\\b
7841
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7842 35
                        ( ' . $small_words_rx . ' )
7843
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7844
                       ~uxi',
7845
            /**
7846
             * @param string[] $matches
7847
             *
7848
             * @return string
7849
             */
7850
            static function (array $matches) use ($encoding): string {
7851
                return static::ucfirst($matches[1], $encoding);
7852 35
            },
7853 35
            $str
7854
        );
7855
7856
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7857 35
        $str = (string) \preg_replace_callback(
7858
            '~\\b
7859
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7860
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7861 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
7862
                      (?!	- )                 # Negative lookahead for another -
7863
                     ~uxi',
7864
            /**
7865
             * @param string[] $matches
7866
             *
7867
             * @return string
7868
             */
7869
            static function (array $matches) use ($encoding): string {
7870
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7871 35
            },
7872 35
            $str
7873
        );
7874
7875 35
        return $str;
7876
    }
7877
7878
    /**
7879
     * Get a binary representation of a specific string.
7880
     *
7881
     * @param string $str <p>The input string.</p>
7882
     *
7883
     * @return false|string
7884
     *                      <p>false on error</p>
7885
     */
7886 2
    public static function str_to_binary(string $str)
7887
    {
7888 2
        $value = \unpack('H*', $str);
7889 2
        if ($value === false) {
7890
            return false;
7891
        }
7892
7893
        /** @noinspection OffsetOperationsInspection */
7894 2
        return \base_convert($value[1], 16, 2);
7895
    }
7896
7897
    /**
7898
     * @param string   $str
7899
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
7900
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
7901
     *
7902
     * @return string[]
7903
     */
7904 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
7905
    {
7906 17
        if ($str === '') {
7907 1
            return $remove_empty_values === true ? [] : [''];
7908
        }
7909
7910 16
        if (self::$SUPPORT['mbstring'] === true) {
7911
            /** @noinspection PhpComposerExtensionStubsInspection */
7912 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7913
        } else {
7914
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7915
        }
7916
7917 16
        if ($return === false) {
7918
            return $remove_empty_values === true ? [] : [''];
7919
        }
7920
7921
        if (
7922 16
            $remove_short_values === null
7923
            &&
7924 16
            $remove_empty_values === false
7925
        ) {
7926 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7927
        }
7928
7929
        return self::reduce_string_array(
7930
            $return,
7931
            $remove_empty_values,
7932
            $remove_short_values
7933
        );
7934
    }
7935
7936
    /**
7937
     * Convert a string into an array of words.
7938
     *
7939
     * @param string   $str
7940
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
7941
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
7942
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
7943
     *
7944
     * @return string[]
7945
     */
7946 13
    public static function str_to_words(
7947
        string $str,
7948
        string $char_list = '',
7949
        bool $remove_empty_values = false,
7950
        int $remove_short_values = null
7951
    ): array {
7952 13
        if ($str === '') {
7953 4
            return $remove_empty_values === true ? [] : [''];
7954
        }
7955
7956 13
        $char_list = self::rxClass($char_list, '\pL');
7957
7958 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7959 13
        if ($return === false) {
7960
            return $remove_empty_values === true ? [] : [''];
7961
        }
7962
7963
        if (
7964 13
            $remove_short_values === null
7965
            &&
7966 13
            $remove_empty_values === false
7967
        ) {
7968 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7969
        }
7970
7971 2
        $tmp_return = self::reduce_string_array(
7972 2
            $return,
7973 2
            $remove_empty_values,
7974 2
            $remove_short_values
7975
        );
7976
7977 2
        foreach ($tmp_return as &$item) {
7978 2
            $item = (string) $item;
7979
        }
7980
7981 2
        return $tmp_return;
7982
    }
7983
7984
    /**
7985
     * alias for "UTF8::to_ascii()"
7986
     *
7987
     * @param string $str
7988
     * @param string $unknown
7989
     * @param bool   $strict
7990
     *
7991
     * @return string
7992
     *
7993
     * @see UTF8::to_ascii()
7994
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
7995
     */
7996 7
    public static function str_transliterate(
7997
        string $str,
7998
        string $unknown = '?',
7999
        bool $strict = false
8000
    ): string {
8001 7
        return self::to_ascii($str, $unknown, $strict);
8002
    }
8003
8004
    /**
8005
     * Truncates the string to a given length. If $substring is provided, and
8006
     * truncating occurs, the string is further truncated so that the substring
8007
     * may be appended without exceeding the desired length.
8008
     *
8009
     * @param string $str
8010
     * @param int    $length    <p>Desired length of the truncated string.</p>
8011
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8012
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8013
     *
8014
     * @return string string after truncating
8015
     */
8016 22
    public static function str_truncate(
8017
        string $str,
8018
        int $length,
8019
        string $substring = '',
8020
        string $encoding = 'UTF-8'
8021
    ): string {
8022 22
        if ($str === '') {
8023
            return '';
8024
        }
8025
8026 22
        if ($encoding === 'UTF-8') {
8027 10
            if ($length >= (int) \mb_strlen($str)) {
8028 2
                return $str;
8029
            }
8030
8031 8
            if ($substring !== '') {
8032 4
                $length -= (int) \mb_strlen($substring);
8033
8034
                /** @noinspection UnnecessaryCastingInspection */
8035 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8036
            }
8037
8038
            /** @noinspection UnnecessaryCastingInspection */
8039 4
            return (string) \mb_substr($str, 0, $length);
8040
        }
8041
8042 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8043
8044 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8045 2
            return $str;
8046
        }
8047
8048 10
        if ($substring !== '') {
8049 6
            $length -= (int) self::strlen($substring, $encoding);
8050
        }
8051
8052
        return (
8053 10
               (string) self::substr(
8054 10
                   $str,
8055 10
                   0,
8056 10
                   $length,
8057 10
                   $encoding
8058
               )
8059 10
               ) . $substring;
8060
    }
8061
8062
    /**
8063
     * Truncates the string to a given length, while ensuring that it does not
8064
     * split words. If $substring is provided, and truncating occurs, the
8065
     * string is further truncated so that the substring may be appended without
8066
     * exceeding the desired length.
8067
     *
8068
     * @param string $str
8069
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8070
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8071
     *                                                       ''</p>
8072
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8073
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8074
     *
8075
     * @return string string after truncating
8076
     */
8077 47
    public static function str_truncate_safe(
8078
        string $str,
8079
        int $length,
8080
        string $substring = '',
8081
        string $encoding = 'UTF-8',
8082
        bool $ignore_do_not_split_words_for_one_word = false
8083
    ): string {
8084 47
        if ($str === '' || $length <= 0) {
8085 1
            return $substring;
8086
        }
8087
8088 47
        if ($encoding === 'UTF-8') {
8089 21
            if ($length >= (int) \mb_strlen($str)) {
8090 5
                return $str;
8091
            }
8092
8093
            // need to further trim the string so we can append the substring
8094 17
            $length -= (int) \mb_strlen($substring);
8095 17
            if ($length <= 0) {
8096 1
                return $substring;
8097
            }
8098
8099 17
            $truncated = \mb_substr($str, 0, $length);
8100
8101 17
            if ($truncated === false) {
8102
                return '';
8103
            }
8104
8105
            // if the last word was truncated
8106 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8107 17
            if ($space_position !== $length) {
8108
                // find pos of the last occurrence of a space, get up to that
8109 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8110
8111
                if (
8112 13
                    $last_position !== false
8113
                    ||
8114 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8115
                ) {
8116 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8117
                }
8118
            }
8119
        } else {
8120 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8121
8122 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8123 4
                return $str;
8124
            }
8125
8126
            // need to further trim the string so we can append the substring
8127 22
            $length -= (int) self::strlen($substring, $encoding);
8128 22
            if ($length <= 0) {
8129
                return $substring;
8130
            }
8131
8132 22
            $truncated = self::substr($str, 0, $length, $encoding);
8133
8134 22
            if ($truncated === false) {
8135
                return '';
8136
            }
8137
8138
            // if the last word was truncated
8139 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8140 22
            if ($space_position !== $length) {
8141
                // find pos of the last occurrence of a space, get up to that
8142 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8143
8144
                if (
8145 12
                    $last_position !== false
8146
                    ||
8147 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8148
                ) {
8149 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8150
                }
8151
            }
8152
        }
8153
8154 39
        return $truncated . $substring;
8155
    }
8156
8157
    /**
8158
     * Returns a lowercase and trimmed string separated by underscores.
8159
     * Underscores are inserted before uppercase characters (with the exception
8160
     * of the first character of the string), and in place of spaces as well as
8161
     * dashes.
8162
     *
8163
     * @param string $str
8164
     *
8165
     * @return string the underscored string
8166
     */
8167 16
    public static function str_underscored(string $str): string
8168
    {
8169 16
        return self::str_delimit($str, '_');
8170
    }
8171
8172
    /**
8173
     * Returns an UpperCamelCase version of the supplied string. It trims
8174
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8175
     * and underscores, and removes spaces, dashes, underscores.
8176
     *
8177
     * @param string      $str                           <p>The input string.</p>
8178
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8179
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8180
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8181
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8182
     *
8183
     * @return string string in UpperCamelCase
8184
     */
8185 13
    public static function str_upper_camelize(
8186
        string $str,
8187
        string $encoding = 'UTF-8',
8188
        bool $clean_utf8 = false,
8189
        string $lang = null,
8190
        bool $try_to_keep_the_string_length = false
8191
    ): string {
8192 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8193
    }
8194
8195
    /**
8196
     * alias for "UTF8::ucfirst()"
8197
     *
8198
     * @param string      $str
8199
     * @param string      $encoding
8200
     * @param bool        $clean_utf8
8201
     * @param string|null $lang
8202
     * @param bool        $try_to_keep_the_string_length
8203
     *
8204
     * @return string
8205
     *
8206
     * @see UTF8::ucfirst()
8207
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8208
     */
8209 5
    public static function str_upper_first(
8210
        string $str,
8211
        string $encoding = 'UTF-8',
8212
        bool $clean_utf8 = false,
8213
        string $lang = null,
8214
        bool $try_to_keep_the_string_length = false
8215
    ): string {
8216 5
        return self::ucfirst(
8217 5
            $str,
8218 5
            $encoding,
8219 5
            $clean_utf8,
8220 5
            $lang,
8221 5
            $try_to_keep_the_string_length
8222
        );
8223
    }
8224
8225
    /**
8226
     * Get the number of words in a specific string.
8227
     *
8228
     * @param string $str       <p>The input string.</p>
8229
     * @param int    $format    [optional] <p>
8230
     *                          <strong>0</strong> => return a number of words (default)<br>
8231
     *                          <strong>1</strong> => return an array of words<br>
8232
     *                          <strong>2</strong> => return an array of words with word-offset as key
8233
     *                          </p>
8234
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8235
     *
8236
     * @return int|string[] The number of words in the string
8237
     */
8238 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8239
    {
8240 2
        $str_parts = self::str_to_words($str, $char_list);
8241
8242 2
        $len = \count($str_parts);
8243
8244 2
        if ($format === 1) {
8245 2
            $number_of_words = [];
8246 2
            for ($i = 1; $i < $len; $i += 2) {
8247 2
                $number_of_words[] = $str_parts[$i];
8248
            }
8249 2
        } elseif ($format === 2) {
8250 2
            $number_of_words = [];
8251 2
            $offset = (int) self::strlen($str_parts[0]);
8252 2
            for ($i = 1; $i < $len; $i += 2) {
8253 2
                $number_of_words[$offset] = $str_parts[$i];
8254 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8255
            }
8256
        } else {
8257 2
            $number_of_words = (int) (($len - 1) / 2);
8258
        }
8259
8260 2
        return $number_of_words;
8261
    }
8262
8263
    /**
8264
     * Case-insensitive string comparison.
8265
     *
8266
     * INFO: Case-insensitive version of UTF8::strcmp()
8267
     *
8268
     * @param string $str1     <p>The first string.</p>
8269
     * @param string $str2     <p>The second string.</p>
8270
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8271
     *
8272
     * @return int
8273
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8274
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8275
     *             <strong>0</strong> if they are equal
8276
     */
8277 23
    public static function strcasecmp(
8278
        string $str1,
8279
        string $str2,
8280
        string $encoding = 'UTF-8'
8281
    ): int {
8282 23
        return self::strcmp(
8283 23
            self::strtocasefold(
8284 23
                $str1,
8285 23
                true,
8286 23
                false,
8287 23
                $encoding,
8288 23
                null,
8289 23
                false
8290
            ),
8291 23
            self::strtocasefold(
8292 23
                $str2,
8293 23
                true,
8294 23
                false,
8295 23
                $encoding,
8296 23
                null,
8297 23
                false
8298
            )
8299
        );
8300
    }
8301
8302
    /**
8303
     * alias for "UTF8::strstr()"
8304
     *
8305
     * @param string $haystack
8306
     * @param string $needle
8307
     * @param bool   $before_needle
8308
     * @param string $encoding
8309
     * @param bool   $clean_utf8
8310
     *
8311
     * @return false|string
8312
     *
8313
     * @see UTF8::strstr()
8314
     */
8315 2
    public static function strchr(
8316
        string $haystack,
8317
        string $needle,
8318
        bool $before_needle = false,
8319
        string $encoding = 'UTF-8',
8320
        bool $clean_utf8 = false
8321
    ) {
8322 2
        return self::strstr(
8323 2
            $haystack,
8324 2
            $needle,
8325 2
            $before_needle,
8326 2
            $encoding,
8327 2
            $clean_utf8
8328
        );
8329
    }
8330
8331
    /**
8332
     * Case-sensitive string comparison.
8333
     *
8334
     * @param string $str1 <p>The first string.</p>
8335
     * @param string $str2 <p>The second string.</p>
8336
     *
8337
     * @return int
8338
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8339
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8340
     *             <strong>0</strong> if they are equal
8341
     */
8342 29
    public static function strcmp(string $str1, string $str2): int
8343
    {
8344 29
        if ($str1 === $str2) {
8345 21
            return 0;
8346
        }
8347
8348 24
        return \strcmp(
8349 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8350 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8351
        );
8352
    }
8353
8354
    /**
8355
     * Find length of initial segment not matching mask.
8356
     *
8357
     * @param string $str
8358
     * @param string $char_list
8359
     * @param int    $offset
8360
     * @param int    $length
8361
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8362
     *
8363
     * @return int
8364
     */
8365 12
    public static function strcspn(
8366
        string $str,
8367
        string $char_list,
8368
        int $offset = null,
8369
        int $length = null,
8370
        string $encoding = 'UTF-8'
8371
    ): int {
8372 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8373
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8374
        }
8375
8376 12
        if ($char_list === '') {
8377 2
            return (int) self::strlen($str, $encoding);
8378
        }
8379
8380 11
        if ($offset !== null || $length !== null) {
8381 3
            if ($encoding === 'UTF-8') {
8382 3
                if ($length === null) {
8383
                    /** @noinspection UnnecessaryCastingInspection */
8384 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8385
                } else {
8386
                    /** @noinspection UnnecessaryCastingInspection */
8387 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8388
                }
8389
            } else {
8390
                /** @noinspection UnnecessaryCastingInspection */
8391
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8392
            }
8393
8394 3
            if ($str_tmp === false) {
8395
                return 0;
8396
            }
8397
8398
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8399 3
            $str = $str_tmp;
8400
        }
8401
8402 11
        if ($str === '') {
8403 2
            return 0;
8404
        }
8405
8406 10
        $matches = [];
8407 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8408 9
            $return = self::strlen($matches[1], $encoding);
8409 9
            if ($return === false) {
8410
                return 0;
8411
            }
8412
8413 9
            return $return;
8414
        }
8415
8416 2
        return (int) self::strlen($str, $encoding);
8417
    }
8418
8419
    /**
8420
     * alias for "UTF8::stristr()"
8421
     *
8422
     * @param string $haystack
8423
     * @param string $needle
8424
     * @param bool   $before_needle
8425
     * @param string $encoding
8426
     * @param bool   $clean_utf8
8427
     *
8428
     * @return false|string
8429
     *
8430
     * @see UTF8::stristr()
8431
     */
8432 1
    public static function strichr(
8433
        string $haystack,
8434
        string $needle,
8435
        bool $before_needle = false,
8436
        string $encoding = 'UTF-8',
8437
        bool $clean_utf8 = false
8438
    ) {
8439 1
        return self::stristr(
8440 1
            $haystack,
8441 1
            $needle,
8442 1
            $before_needle,
8443 1
            $encoding,
8444 1
            $clean_utf8
8445
        );
8446
    }
8447
8448
    /**
8449
     * Create a UTF-8 string from code points.
8450
     *
8451
     * INFO: opposite to UTF8::codepoints()
8452
     *
8453
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8454
     *
8455
     * @return string UTF-8 encoded string
8456
     */
8457 4
    public static function string(array $array): string
8458
    {
8459 4
        return \implode(
8460 4
            '',
8461 4
            \array_map(
8462
                [
8463 4
                    self::class,
8464
                    'chr',
8465
                ],
8466 4
                $array
8467
            )
8468
        );
8469
    }
8470
8471
    /**
8472
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8473
     *
8474
     * @param string $str <p>The input string.</p>
8475
     *
8476
     * @return bool
8477
     *              <strong>true</strong> if the string has BOM at the start,<br>
8478
     *              <strong>false</strong> otherwise
8479
     */
8480 6
    public static function string_has_bom(string $str): bool
8481
    {
8482
        /** @noinspection PhpUnusedLocalVariableInspection */
8483 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8484 6
            if (\strpos($str, $bom_string) === 0) {
8485 6
                return true;
8486
            }
8487
        }
8488
8489 6
        return false;
8490
    }
8491
8492
    /**
8493
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8494
     *
8495
     * @see http://php.net/manual/en/function.strip-tags.php
8496
     *
8497
     * @param string $str            <p>
8498
     *                               The input string.
8499
     *                               </p>
8500
     * @param string $allowable_tags [optional] <p>
8501
     *                               You can use the optional second parameter to specify tags which should
8502
     *                               not be stripped.
8503
     *                               </p>
8504
     *                               <p>
8505
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8506
     *                               can not be changed with allowable_tags.
8507
     *                               </p>
8508
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8509
     *
8510
     * @return string
8511
     *                <p>The stripped string.</p>
8512
     */
8513 4
    public static function strip_tags(
8514
        string $str,
8515
        string $allowable_tags = null,
8516
        bool $clean_utf8 = false
8517
    ): string {
8518 4
        if ($str === '') {
8519 1
            return '';
8520
        }
8521
8522 4
        if ($clean_utf8 === true) {
8523 2
            $str = self::clean($str);
8524
        }
8525
8526 4
        if ($allowable_tags === null) {
8527 4
            return \strip_tags($str);
8528
        }
8529
8530 2
        return \strip_tags($str, $allowable_tags);
8531
    }
8532
8533
    /**
8534
     * Strip all whitespace characters. This includes tabs and newline
8535
     * characters, as well as multibyte whitespace such as the thin space
8536
     * and ideographic space.
8537
     *
8538
     * @param string $str
8539
     *
8540
     * @return string
8541
     */
8542 36
    public static function strip_whitespace(string $str): string
8543
    {
8544 36
        if ($str === '') {
8545 3
            return '';
8546
        }
8547
8548 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8549
    }
8550
8551
    /**
8552
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
8553
     *
8554
     * @see http://php.net/manual/en/function.mb-stripos.php
8555
     *
8556
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8557
     * @param string $needle     <p>The string to find in haystack.</p>
8558
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8559
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8560
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8561
     *
8562
     * @return false|int
8563
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8564
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8565
     */
8566 24
    public static function stripos(
8567
        string $haystack,
8568
        string $needle,
8569
        int $offset = 0,
8570
        $encoding = 'UTF-8',
8571
        bool $clean_utf8 = false
8572
    ) {
8573 24
        if ($haystack === '' || $needle === '') {
8574 5
            return false;
8575
        }
8576
8577 23
        if ($clean_utf8 === true) {
8578
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8579
            // if invalid characters are found in $haystack before $needle
8580 1
            $haystack = self::clean($haystack);
8581 1
            $needle = self::clean($needle);
8582
        }
8583
8584 23
        if (self::$SUPPORT['mbstring'] === true) {
8585 23
            if ($encoding === 'UTF-8') {
8586 23
                return \mb_stripos($haystack, $needle, $offset);
8587
            }
8588
8589 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8590
8591 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8592
        }
8593
8594 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8595
8596
        if (
8597 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8598
            &&
8599 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8600
            &&
8601 2
            self::$SUPPORT['intl'] === true
8602
        ) {
8603
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8604
            if ($return_tmp !== false) {
8605
                return $return_tmp;
8606
            }
8607
        }
8608
8609
        //
8610
        // fallback for ascii only
8611
        //
8612
8613 2
        if (ASCII::is_ascii($haystack . $needle)) {
8614
            return \stripos($haystack, $needle, $offset);
8615
        }
8616
8617
        //
8618
        // fallback via vanilla php
8619
        //
8620
8621 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8622 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8623
8624 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8625
    }
8626
8627
    /**
8628
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8629
     *
8630
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8631
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8632
     * @param bool   $before_needle [optional] <p>
8633
     *                              If <b>TRUE</b>, it returns the part of the
8634
     *                              haystack before the first occurrence of the needle (excluding the needle).
8635
     *                              </p>
8636
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8637
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8638
     *
8639
     * @return false|string
8640
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8641
     */
8642 12
    public static function stristr(
8643
        string $haystack,
8644
        string $needle,
8645
        bool $before_needle = false,
8646
        string $encoding = 'UTF-8',
8647
        bool $clean_utf8 = false
8648
    ) {
8649 12
        if ($haystack === '' || $needle === '') {
8650 3
            return false;
8651
        }
8652
8653 9
        if ($clean_utf8 === true) {
8654
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8655
            // if invalid characters are found in $haystack before $needle
8656 1
            $needle = self::clean($needle);
8657 1
            $haystack = self::clean($haystack);
8658
        }
8659
8660 9
        if (!$needle) {
8661
            return $haystack;
8662
        }
8663
8664 9
        if (self::$SUPPORT['mbstring'] === true) {
8665 9
            if ($encoding === 'UTF-8') {
8666 9
                return \mb_stristr($haystack, $needle, $before_needle);
8667
            }
8668
8669 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8670
8671 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8672
        }
8673
8674
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8675
8676
        if (
8677
            $encoding !== 'UTF-8'
8678
            &&
8679
            self::$SUPPORT['mbstring'] === false
8680
        ) {
8681
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8682
        }
8683
8684
        if (
8685
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8686
            &&
8687
            self::$SUPPORT['intl'] === true
8688
        ) {
8689
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8690
            if ($return_tmp !== false) {
8691
                return $return_tmp;
8692
            }
8693
        }
8694
8695
        if (ASCII::is_ascii($needle . $haystack)) {
8696
            return \stristr($haystack, $needle, $before_needle);
8697
        }
8698
8699
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8700
8701
        if (!isset($match[1])) {
8702
            return false;
8703
        }
8704
8705
        if ($before_needle) {
8706
            return $match[1];
8707
        }
8708
8709
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8710
    }
8711
8712
    /**
8713
     * Get the string length, not the byte-length!
8714
     *
8715
     * @see http://php.net/manual/en/function.mb-strlen.php
8716
     *
8717
     * @param string $str        <p>The string being checked for length.</p>
8718
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8719
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8720
     *
8721
     * @return false|int
8722
     *                   <p>
8723
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8724
     *                   $encoding.
8725
     *                   (One multi-byte character counted as +1).
8726
     *                   <br>
8727
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8728
     *                   chars.
8729
     *                   </p>
8730
     */
8731 173
    public static function strlen(
8732
        string $str,
8733
        string $encoding = 'UTF-8',
8734
        bool $clean_utf8 = false
8735
    ) {
8736 173
        if ($str === '') {
8737 21
            return 0;
8738
        }
8739
8740 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8741 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8742
        }
8743
8744 171
        if ($clean_utf8 === true) {
8745
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8746
            // if invalid characters are found in $str
8747 4
            $str = self::clean($str);
8748
        }
8749
8750
        //
8751
        // fallback via mbstring
8752
        //
8753
8754 171
        if (self::$SUPPORT['mbstring'] === true) {
8755 165
            if ($encoding === 'UTF-8') {
8756 165
                return \mb_strlen($str);
8757
            }
8758
8759 4
            return \mb_strlen($str, $encoding);
8760
        }
8761
8762
        //
8763
        // fallback for binary || ascii only
8764
        //
8765
8766
        if (
8767 8
            $encoding === 'CP850'
8768
            ||
8769 8
            $encoding === 'ASCII'
8770
        ) {
8771
            return \strlen($str);
8772
        }
8773
8774
        if (
8775 8
            $encoding !== 'UTF-8'
8776
            &&
8777 8
            self::$SUPPORT['mbstring'] === false
8778
            &&
8779 8
            self::$SUPPORT['iconv'] === false
8780
        ) {
8781 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8782
        }
8783
8784
        //
8785
        // fallback via iconv
8786
        //
8787
8788 8
        if (self::$SUPPORT['iconv'] === true) {
8789
            $return_tmp = \iconv_strlen($str, $encoding);
8790
            if ($return_tmp !== false) {
8791
                return $return_tmp;
8792
            }
8793
        }
8794
8795
        //
8796
        // fallback via intl
8797
        //
8798
8799
        if (
8800 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8801
            &&
8802 8
            self::$SUPPORT['intl'] === true
8803
        ) {
8804
            $return_tmp = \grapheme_strlen($str);
8805
            if ($return_tmp !== null) {
8806
                return $return_tmp;
8807
            }
8808
        }
8809
8810
        //
8811
        // fallback for ascii only
8812
        //
8813
8814 8
        if (ASCII::is_ascii($str)) {
8815 4
            return \strlen($str);
8816
        }
8817
8818
        //
8819
        // fallback via vanilla php
8820
        //
8821
8822 8
        \preg_match_all('/./us', $str, $parts);
8823
8824 8
        $return_tmp = \count($parts[0]);
8825 8
        if ($return_tmp === 0) {
8826
            return false;
8827
        }
8828
8829 8
        return $return_tmp;
8830
    }
8831
8832
    /**
8833
     * Get string length in byte.
8834
     *
8835
     * @param string $str
8836
     *
8837
     * @return int
8838
     */
8839
    public static function strlen_in_byte(string $str): int
8840
    {
8841
        if ($str === '') {
8842
            return 0;
8843
        }
8844
8845
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8846
            // "mb_" is available if overload is used, so use it ...
8847
            return \mb_strlen($str, 'CP850'); // 8-BIT
8848
        }
8849
8850
        return \strlen($str);
8851
    }
8852
8853
    /**
8854
     * Case-insensitive string comparisons using a "natural order" algorithm.
8855
     *
8856
     * INFO: natural order version of UTF8::strcasecmp()
8857
     *
8858
     * @param string $str1     <p>The first string.</p>
8859
     * @param string $str2     <p>The second string.</p>
8860
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8861
     *
8862
     * @return int
8863
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8864
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8865
     *             <strong>0</strong> if they are equal
8866
     */
8867 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8868
    {
8869 2
        return self::strnatcmp(
8870 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8871 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8872
        );
8873
    }
8874
8875
    /**
8876
     * String comparisons using a "natural order" algorithm
8877
     *
8878
     * INFO: natural order version of UTF8::strcmp()
8879
     *
8880
     * @see http://php.net/manual/en/function.strnatcmp.php
8881
     *
8882
     * @param string $str1 <p>The first string.</p>
8883
     * @param string $str2 <p>The second string.</p>
8884
     *
8885
     * @return int
8886
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8887
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8888
     *             <strong>0</strong> if they are equal
8889
     */
8890 4
    public static function strnatcmp(string $str1, string $str2): int
8891
    {
8892 4
        if ($str1 === $str2) {
8893 4
            return 0;
8894
        }
8895
8896 4
        return \strnatcmp(
8897 4
            (string) self::strtonatfold($str1),
8898 4
            (string) self::strtonatfold($str2)
8899
        );
8900
    }
8901
8902
    /**
8903
     * Case-insensitive string comparison of the first n characters.
8904
     *
8905
     * @see http://php.net/manual/en/function.strncasecmp.php
8906
     *
8907
     * @param string $str1     <p>The first string.</p>
8908
     * @param string $str2     <p>The second string.</p>
8909
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8910
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8911
     *
8912
     * @return int
8913
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8914
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8915
     *             <strong>0</strong> if they are equal
8916
     */
8917 2
    public static function strncasecmp(
8918
        string $str1,
8919
        string $str2,
8920
        int $len,
8921
        string $encoding = 'UTF-8'
8922
    ): int {
8923 2
        return self::strncmp(
8924 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8925 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8926 2
            $len
8927
        );
8928
    }
8929
8930
    /**
8931
     * String comparison of the first n characters.
8932
     *
8933
     * @see http://php.net/manual/en/function.strncmp.php
8934
     *
8935
     * @param string $str1     <p>The first string.</p>
8936
     * @param string $str2     <p>The second string.</p>
8937
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8938
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8939
     *
8940
     * @return int
8941
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8942
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8943
     *             <strong>0</strong> if they are equal
8944
     */
8945 4
    public static function strncmp(
8946
        string $str1,
8947
        string $str2,
8948
        int $len,
8949
        string $encoding = 'UTF-8'
8950
    ): int {
8951 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8952
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8953
        }
8954
8955 4
        if ($encoding === 'UTF-8') {
8956 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8957 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8958
        } else {
8959
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8960
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8961
        }
8962
8963 4
        return self::strcmp($str1, $str2);
8964
    }
8965
8966
    /**
8967
     * Search a string for any of a set of characters.
8968
     *
8969
     * @see http://php.net/manual/en/function.strpbrk.php
8970
     *
8971
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8972
     * @param string $char_list <p>This parameter is case-sensitive.</p>
8973
     *
8974
     * @return false|string string starting from the character found, or false if it is not found
8975
     */
8976 2
    public static function strpbrk(string $haystack, string $char_list)
8977
    {
8978 2
        if ($haystack === '' || $char_list === '') {
8979 2
            return false;
8980
        }
8981
8982 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8983 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8984
        }
8985
8986 2
        return false;
8987
    }
8988
8989
    /**
8990
     * Find the position of the first occurrence of a substring in a string.
8991
     *
8992
     * @see http://php.net/manual/en/function.mb-strpos.php
8993
     *
8994
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8995
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
8996
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8997
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8998
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8999
     *
9000
     * @return false|int
9001
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9002
     *                   string.<br> If needle is not found it returns false.
9003
     */
9004 53
    public static function strpos(
9005
        string $haystack,
9006
        $needle,
9007
        int $offset = 0,
9008
        $encoding = 'UTF-8',
9009
        bool $clean_utf8 = false
9010
    ) {
9011 53
        if ($haystack === '') {
9012 4
            return false;
9013
        }
9014
9015
        // iconv and mbstring do not support integer $needle
9016 52
        if ((int) $needle === $needle) {
9017
            $needle = (string) self::chr($needle);
9018
        }
9019 52
        $needle = (string) $needle;
9020
9021 52
        if ($needle === '') {
9022 2
            return false;
9023
        }
9024
9025 52
        if ($clean_utf8 === true) {
9026
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9027
            // if invalid characters are found in $haystack before $needle
9028 3
            $needle = self::clean($needle);
9029 3
            $haystack = self::clean($haystack);
9030
        }
9031
9032 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9033 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9034
        }
9035
9036
        //
9037
        // fallback via mbstring
9038
        //
9039
9040 52
        if (self::$SUPPORT['mbstring'] === true) {
9041 50
            if ($encoding === 'UTF-8') {
9042 50
                return \mb_strpos($haystack, $needle, $offset);
9043
            }
9044
9045 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9046
        }
9047
9048
        //
9049
        // fallback for binary || ascii only
9050
        //
9051
        if (
9052 4
            $encoding === 'CP850'
9053
            ||
9054 4
            $encoding === 'ASCII'
9055
        ) {
9056 2
            return \strpos($haystack, $needle, $offset);
9057
        }
9058
9059
        if (
9060 4
            $encoding !== 'UTF-8'
9061
            &&
9062 4
            self::$SUPPORT['iconv'] === false
9063
            &&
9064 4
            self::$SUPPORT['mbstring'] === false
9065
        ) {
9066 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9067
        }
9068
9069
        //
9070
        // fallback via intl
9071
        //
9072
9073
        if (
9074 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9075
            &&
9076 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9077
            &&
9078 4
            self::$SUPPORT['intl'] === true
9079
        ) {
9080
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9081
            if ($return_tmp !== false) {
9082
                return $return_tmp;
9083
            }
9084
        }
9085
9086
        //
9087
        // fallback via iconv
9088
        //
9089
9090
        if (
9091 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9092
            &&
9093 4
            self::$SUPPORT['iconv'] === true
9094
        ) {
9095
            // ignore invalid negative offset to keep compatibility
9096
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9097
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9098
            if ($return_tmp !== false) {
9099
                return $return_tmp;
9100
            }
9101
        }
9102
9103
        //
9104
        // fallback for ascii only
9105
        //
9106
9107 4
        if (ASCII::is_ascii($haystack . $needle)) {
9108 2
            return \strpos($haystack, $needle, $offset);
9109
        }
9110
9111
        //
9112
        // fallback via vanilla php
9113
        //
9114
9115 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9116 4
        if ($haystack_tmp === false) {
9117
            $haystack_tmp = '';
9118
        }
9119 4
        $haystack = (string) $haystack_tmp;
9120
9121 4
        if ($offset < 0) {
9122
            $offset = 0;
9123
        }
9124
9125 4
        $pos = \strpos($haystack, $needle);
9126 4
        if ($pos === false) {
9127 2
            return false;
9128
        }
9129
9130 4
        if ($pos) {
9131 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9132
        }
9133
9134 2
        return $offset + 0;
9135
    }
9136
9137
    /**
9138
     * Find the position of the first occurrence of a substring in a string.
9139
     *
9140
     * @param string $haystack <p>
9141
     *                         The string being checked.
9142
     *                         </p>
9143
     * @param string $needle   <p>
9144
     *                         The position counted from the beginning of haystack.
9145
     *                         </p>
9146
     * @param int    $offset   [optional] <p>
9147
     *                         The search offset. If it is not specified, 0 is used.
9148
     *                         </p>
9149
     *
9150
     * @return false|int The numeric position of the first occurrence of needle in the
9151
     *                   haystack string. If needle is not found, it returns false.
9152
     */
9153
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9154
    {
9155
        if ($haystack === '' || $needle === '') {
9156
            return false;
9157
        }
9158
9159
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9160
            // "mb_" is available if overload is used, so use it ...
9161
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9162
        }
9163
9164
        return \strpos($haystack, $needle, $offset);
9165
    }
9166
9167
    /**
9168
     * Find the last occurrence of a character in a string within another.
9169
     *
9170
     * @see http://php.net/manual/en/function.mb-strrchr.php
9171
     *
9172
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9173
     * @param string $needle        <p>The string to find in haystack</p>
9174
     * @param bool   $before_needle [optional] <p>
9175
     *                              Determines which portion of haystack
9176
     *                              this function returns.
9177
     *                              If set to true, it returns all of haystack
9178
     *                              from the beginning to the last occurrence of needle.
9179
     *                              If set to false, it returns all of haystack
9180
     *                              from the last occurrence of needle to the end,
9181
     *                              </p>
9182
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9183
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9184
     *
9185
     * @return false|string the portion of haystack or false if needle is not found
9186
     */
9187 2
    public static function strrchr(
9188
        string $haystack,
9189
        string $needle,
9190
        bool $before_needle = false,
9191
        string $encoding = 'UTF-8',
9192
        bool $clean_utf8 = false
9193
    ) {
9194 2
        if ($haystack === '' || $needle === '') {
9195 2
            return false;
9196
        }
9197
9198 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9199 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9200
        }
9201
9202 2
        if ($clean_utf8 === true) {
9203
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9204
            // if invalid characters are found in $haystack before $needle
9205 2
            $needle = self::clean($needle);
9206 2
            $haystack = self::clean($haystack);
9207
        }
9208
9209
        //
9210
        // fallback via mbstring
9211
        //
9212
9213 2
        if (self::$SUPPORT['mbstring'] === true) {
9214 2
            if ($encoding === 'UTF-8') {
9215 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9216
            }
9217
9218 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9219
        }
9220
9221
        //
9222
        // fallback for binary || ascii only
9223
        //
9224
9225
        if (
9226
            $before_needle === false
9227
            &&
9228
            (
9229
                $encoding === 'CP850'
9230
                ||
9231
                $encoding === 'ASCII'
9232
            )
9233
        ) {
9234
            return \strrchr($haystack, $needle);
9235
        }
9236
9237
        if (
9238
            $encoding !== 'UTF-8'
9239
            &&
9240
            self::$SUPPORT['mbstring'] === false
9241
        ) {
9242
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9243
        }
9244
9245
        //
9246
        // fallback via iconv
9247
        //
9248
9249
        if (self::$SUPPORT['iconv'] === true) {
9250
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9251
            if ($needle_tmp === false) {
9252
                return false;
9253
            }
9254
            $needle = (string) $needle_tmp;
9255
9256
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9257
            if ($pos === false) {
9258
                return false;
9259
            }
9260
9261
            if ($before_needle) {
9262
                return self::substr($haystack, 0, $pos, $encoding);
9263
            }
9264
9265
            return self::substr($haystack, $pos, null, $encoding);
9266
        }
9267
9268
        //
9269
        // fallback via vanilla php
9270
        //
9271
9272
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9273
        if ($needle_tmp === false) {
9274
            return false;
9275
        }
9276
        $needle = (string) $needle_tmp;
9277
9278
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9279
        if ($pos === false) {
9280
            return false;
9281
        }
9282
9283
        if ($before_needle) {
9284
            return self::substr($haystack, 0, $pos, $encoding);
9285
        }
9286
9287
        return self::substr($haystack, $pos, null, $encoding);
9288
    }
9289
9290
    /**
9291
     * Reverses characters order in the string.
9292
     *
9293
     * @param string $str      <p>The input string.</p>
9294
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9295
     *
9296
     * @return string the string with characters in the reverse sequence
9297
     */
9298 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9299
    {
9300 10
        if ($str === '') {
9301 4
            return '';
9302
        }
9303
9304
        // init
9305 8
        $reversed = '';
9306
9307 8
        $str = self::emoji_encode($str, true);
9308
9309 8
        if ($encoding === 'UTF-8') {
9310 8
            if (self::$SUPPORT['intl'] === true) {
9311
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9312 8
                $i = (int) \grapheme_strlen($str);
9313 8
                while ($i--) {
9314 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9315 8
                    if ($reversed_tmp !== false) {
9316 8
                        $reversed .= $reversed_tmp;
9317
                    }
9318
                }
9319
            } else {
9320
                $i = (int) \mb_strlen($str);
9321 8
                while ($i--) {
9322
                    $reversed_tmp = \mb_substr($str, $i, 1);
9323
                    if ($reversed_tmp !== false) {
9324
                        $reversed .= $reversed_tmp;
9325
                    }
9326
                }
9327
            }
9328
        } else {
9329
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9330
9331
            $i = (int) self::strlen($str, $encoding);
9332
            while ($i--) {
9333
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9334
                if ($reversed_tmp !== false) {
9335
                    $reversed .= $reversed_tmp;
9336
                }
9337
            }
9338
        }
9339
9340 8
        return self::emoji_decode($reversed, true);
9341
    }
9342
9343
    /**
9344
     * Find the last occurrence of a character in a string within another, case-insensitive.
9345
     *
9346
     * @see http://php.net/manual/en/function.mb-strrichr.php
9347
     *
9348
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9349
     * @param string $needle        <p>The string to find in haystack.</p>
9350
     * @param bool   $before_needle [optional] <p>
9351
     *                              Determines which portion of haystack
9352
     *                              this function returns.
9353
     *                              If set to true, it returns all of haystack
9354
     *                              from the beginning to the last occurrence of needle.
9355
     *                              If set to false, it returns all of haystack
9356
     *                              from the last occurrence of needle to the end,
9357
     *                              </p>
9358
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9359
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9360
     *
9361
     * @return false|string the portion of haystack or<br>false if needle is not found
9362
     */
9363 3
    public static function strrichr(
9364
        string $haystack,
9365
        string $needle,
9366
        bool $before_needle = false,
9367
        string $encoding = 'UTF-8',
9368
        bool $clean_utf8 = false
9369
    ) {
9370 3
        if ($haystack === '' || $needle === '') {
9371 2
            return false;
9372
        }
9373
9374 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9375 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9376
        }
9377
9378 3
        if ($clean_utf8 === true) {
9379
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9380
            // if invalid characters are found in $haystack before $needle
9381 2
            $needle = self::clean($needle);
9382 2
            $haystack = self::clean($haystack);
9383
        }
9384
9385
        //
9386
        // fallback via mbstring
9387
        //
9388
9389 3
        if (self::$SUPPORT['mbstring'] === true) {
9390 3
            if ($encoding === 'UTF-8') {
9391 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9392
            }
9393
9394 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9395
        }
9396
9397
        //
9398
        // fallback via vanilla php
9399
        //
9400
9401
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9402
        if ($needle_tmp === false) {
9403
            return false;
9404
        }
9405
        $needle = (string) $needle_tmp;
9406
9407
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9408
        if ($pos === false) {
9409
            return false;
9410
        }
9411
9412
        if ($before_needle) {
9413
            return self::substr($haystack, 0, $pos, $encoding);
9414
        }
9415
9416
        return self::substr($haystack, $pos, null, $encoding);
9417
    }
9418
9419
    /**
9420
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
9421
     *
9422
     * @param string     $haystack   <p>The string to look in.</p>
9423
     * @param int|string $needle     <p>The string to look for.</p>
9424
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9425
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9426
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9427
     *
9428
     * @return false|int
9429
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9430
     *                   string.<br>If needle is not found, it returns false.</p>
9431
     */
9432 3
    public static function strripos(
9433
        string $haystack,
9434
        $needle,
9435
        int $offset = 0,
9436
        string $encoding = 'UTF-8',
9437
        bool $clean_utf8 = false
9438
    ) {
9439 3
        if ($haystack === '') {
9440
            return false;
9441
        }
9442
9443
        // iconv and mbstring do not support integer $needle
9444 3
        if ((int) $needle === $needle && $needle >= 0) {
9445
            $needle = (string) self::chr($needle);
9446
        }
9447 3
        $needle = (string) $needle;
9448
9449 3
        if ($needle === '') {
9450
            return false;
9451
        }
9452
9453 3
        if ($clean_utf8 === true) {
9454
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9455 2
            $needle = self::clean($needle);
9456 2
            $haystack = self::clean($haystack);
9457
        }
9458
9459 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9460 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9461
        }
9462
9463
        //
9464
        // fallback via mbstrig
9465
        //
9466
9467 3
        if (self::$SUPPORT['mbstring'] === true) {
9468 3
            if ($encoding === 'UTF-8') {
9469 3
                return \mb_strripos($haystack, $needle, $offset);
9470
            }
9471
9472
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9473
        }
9474
9475
        //
9476
        // fallback for binary || ascii only
9477
        //
9478
9479
        if (
9480
            $encoding === 'CP850'
9481
            ||
9482
            $encoding === 'ASCII'
9483
        ) {
9484
            return \strripos($haystack, $needle, $offset);
9485
        }
9486
9487
        if (
9488
            $encoding !== 'UTF-8'
9489
            &&
9490
            self::$SUPPORT['mbstring'] === false
9491
        ) {
9492
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9493
        }
9494
9495
        //
9496
        // fallback via intl
9497
        //
9498
9499
        if (
9500
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9501
            &&
9502
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9503
            &&
9504
            self::$SUPPORT['intl'] === true
9505
        ) {
9506
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9507
            if ($return_tmp !== false) {
9508
                return $return_tmp;
9509
            }
9510
        }
9511
9512
        //
9513
        // fallback for ascii only
9514
        //
9515
9516
        if (ASCII::is_ascii($haystack . $needle)) {
9517
            return \strripos($haystack, $needle, $offset);
9518
        }
9519
9520
        //
9521
        // fallback via vanilla php
9522
        //
9523
9524
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9525
        $needle = self::strtocasefold($needle, true, false, $encoding);
9526
9527
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9528
    }
9529
9530
    /**
9531
     * Finds position of last occurrence of a string within another, case-insensitive.
9532
     *
9533
     * @param string $haystack <p>
9534
     *                         The string from which to get the position of the last occurrence
9535
     *                         of needle.
9536
     *                         </p>
9537
     * @param string $needle   <p>
9538
     *                         The string to find in haystack.
9539
     *                         </p>
9540
     * @param int    $offset   [optional] <p>
9541
     *                         The position in haystack
9542
     *                         to start searching.
9543
     *                         </p>
9544
     *
9545
     * @return false|int
9546
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9547
     *                   haystack string, or false if needle is not found.</p>
9548
     */
9549
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9550
    {
9551
        if ($haystack === '' || $needle === '') {
9552
            return false;
9553
        }
9554
9555
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9556
            // "mb_" is available if overload is used, so use it ...
9557
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9558
        }
9559
9560
        return \strripos($haystack, $needle, $offset);
9561
    }
9562
9563
    /**
9564
     * Find the position of the last occurrence of a substring in a string.
9565
     *
9566
     * @see http://php.net/manual/en/function.mb-strrpos.php
9567
     *
9568
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9569
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9570
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9571
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9572
     *                               the end of the string.
9573
     *                               </p>
9574
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9575
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9576
     *
9577
     * @return false|int
9578
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9579
     *                   string.<br>If needle is not found, it returns false.</p>
9580
     */
9581 35
    public static function strrpos(
9582
        string $haystack,
9583
        $needle,
9584
        int $offset = 0,
9585
        string $encoding = 'UTF-8',
9586
        bool $clean_utf8 = false
9587
    ) {
9588 35
        if ($haystack === '') {
9589 3
            return false;
9590
        }
9591
9592
        // iconv and mbstring do not support integer $needle
9593 34
        if ((int) $needle === $needle && $needle >= 0) {
9594 2
            $needle = (string) self::chr($needle);
9595
        }
9596 34
        $needle = (string) $needle;
9597
9598 34
        if ($needle === '') {
9599 2
            return false;
9600
        }
9601
9602 34
        if ($clean_utf8 === true) {
9603
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9604 4
            $needle = self::clean($needle);
9605 4
            $haystack = self::clean($haystack);
9606
        }
9607
9608 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9609 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9610
        }
9611
9612
        //
9613
        // fallback via mbstring
9614
        //
9615
9616 34
        if (self::$SUPPORT['mbstring'] === true) {
9617 34
            if ($encoding === 'UTF-8') {
9618 34
                return \mb_strrpos($haystack, $needle, $offset);
9619
            }
9620
9621 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9622
        }
9623
9624
        //
9625
        // fallback for binary || ascii only
9626
        //
9627
9628
        if (
9629
            $encoding === 'CP850'
9630
            ||
9631
            $encoding === 'ASCII'
9632
        ) {
9633
            return \strrpos($haystack, $needle, $offset);
9634
        }
9635
9636
        if (
9637
            $encoding !== 'UTF-8'
9638
            &&
9639
            self::$SUPPORT['mbstring'] === false
9640
        ) {
9641
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9642
        }
9643
9644
        //
9645
        // fallback via intl
9646
        //
9647
9648
        if (
9649
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9650
            &&
9651
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9652
            &&
9653
            self::$SUPPORT['intl'] === true
9654
        ) {
9655
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9656
            if ($return_tmp !== false) {
9657
                return $return_tmp;
9658
            }
9659
        }
9660
9661
        //
9662
        // fallback for ascii only
9663
        //
9664
9665
        if (ASCII::is_ascii($haystack . $needle)) {
9666
            return \strrpos($haystack, $needle, $offset);
9667
        }
9668
9669
        //
9670
        // fallback via vanilla php
9671
        //
9672
9673
        $haystack_tmp = null;
9674
        if ($offset > 0) {
9675
            $haystack_tmp = self::substr($haystack, $offset);
9676
        } elseif ($offset < 0) {
9677
            $haystack_tmp = self::substr($haystack, 0, $offset);
9678
            $offset = 0;
9679
        }
9680
9681
        if ($haystack_tmp !== null) {
9682
            if ($haystack_tmp === false) {
9683
                $haystack_tmp = '';
9684
            }
9685
            $haystack = (string) $haystack_tmp;
9686
        }
9687
9688
        $pos = \strrpos($haystack, $needle);
9689
        if ($pos === false) {
9690
            return false;
9691
        }
9692
9693
        $str_tmp = \substr($haystack, 0, $pos);
9694
        if ($str_tmp === false) {
9695
            return false;
9696
        }
9697
9698
        return $offset + (int) self::strlen($str_tmp);
9699
    }
9700
9701
    /**
9702
     * Find the position of the last occurrence of a substring in a string.
9703
     *
9704
     * @param string $haystack <p>
9705
     *                         The string being checked, for the last occurrence
9706
     *                         of needle.
9707
     *                         </p>
9708
     * @param string $needle   <p>
9709
     *                         The string to find in haystack.
9710
     *                         </p>
9711
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9712
     *                         the string. Negative values will stop searching at an arbitrary point
9713
     *                         prior to the end of the string.
9714
     *                         </p>
9715
     *
9716
     * @return false|int
9717
     *                   <p>The numeric position of the last occurrence of needle in the
9718
     *                   haystack string. If needle is not found, it returns false.</p>
9719
     */
9720
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9721
    {
9722
        if ($haystack === '' || $needle === '') {
9723
            return false;
9724
        }
9725
9726
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9727
            // "mb_" is available if overload is used, so use it ...
9728
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9729
        }
9730
9731
        return \strrpos($haystack, $needle, $offset);
9732
    }
9733
9734
    /**
9735
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9736
     * mask.
9737
     *
9738
     * @param string $str      <p>The input string.</p>
9739
     * @param string $mask     <p>The mask of chars</p>
9740
     * @param int    $offset   [optional]
9741
     * @param int    $length   [optional]
9742
     * @param string $encoding [optional] <p>Set the charset.</p>
9743
     *
9744
     * @return false|int
9745
     */
9746 10
    public static function strspn(
9747
        string $str,
9748
        string $mask,
9749
        int $offset = 0,
9750
        int $length = null,
9751
        string $encoding = 'UTF-8'
9752
    ) {
9753 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9754
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9755
        }
9756
9757 10
        if ($offset || $length !== null) {
9758 2
            if ($encoding === 'UTF-8') {
9759 2
                if ($length === null) {
9760
                    $str = (string) \mb_substr($str, $offset);
9761
                } else {
9762 2
                    $str = (string) \mb_substr($str, $offset, $length);
9763
                }
9764
            } else {
9765
                $str = (string) self::substr($str, $offset, $length, $encoding);
9766
            }
9767
        }
9768
9769 10
        if ($str === '' || $mask === '') {
9770 2
            return 0;
9771
        }
9772
9773 8
        $matches = [];
9774
9775 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9776
    }
9777
9778
    /**
9779
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9780
     *
9781
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9782
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9783
     * @param bool   $before_needle [optional] <p>
9784
     *                              If <b>TRUE</b>, strstr() returns the part of the
9785
     *                              haystack before the first occurrence of the needle (excluding the needle).
9786
     *                              </p>
9787
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9788
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9789
     *
9790
     * @return false|string
9791
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9792
     */
9793 3
    public static function strstr(
9794
        string $haystack,
9795
        string $needle,
9796
        bool $before_needle = false,
9797
        string $encoding = 'UTF-8',
9798
        $clean_utf8 = false
9799
    ) {
9800 3
        if ($haystack === '' || $needle === '') {
9801 2
            return false;
9802
        }
9803
9804 3
        if ($clean_utf8 === true) {
9805
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9806
            // if invalid characters are found in $haystack before $needle
9807
            $needle = self::clean($needle);
9808
            $haystack = self::clean($haystack);
9809
        }
9810
9811 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9812 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9813
        }
9814
9815
        //
9816
        // fallback via mbstring
9817
        //
9818
9819 3
        if (self::$SUPPORT['mbstring'] === true) {
9820 3
            if ($encoding === 'UTF-8') {
9821 3
                return \mb_strstr($haystack, $needle, $before_needle);
9822
            }
9823
9824 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9825
        }
9826
9827
        //
9828
        // fallback for binary || ascii only
9829
        //
9830
9831
        if (
9832
            $encoding === 'CP850'
9833
            ||
9834
            $encoding === 'ASCII'
9835
        ) {
9836
            return \strstr($haystack, $needle, $before_needle);
9837
        }
9838
9839
        if (
9840
            $encoding !== 'UTF-8'
9841
            &&
9842
            self::$SUPPORT['mbstring'] === false
9843
        ) {
9844
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9845
        }
9846
9847
        //
9848
        // fallback via intl
9849
        //
9850
9851
        if (
9852
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9853
            &&
9854
            self::$SUPPORT['intl'] === true
9855
        ) {
9856
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
9857
            if ($return_tmp !== false) {
9858
                return $return_tmp;
9859
            }
9860
        }
9861
9862
        //
9863
        // fallback for ascii only
9864
        //
9865
9866
        if (ASCII::is_ascii($haystack . $needle)) {
9867
            return \strstr($haystack, $needle, $before_needle);
9868
        }
9869
9870
        //
9871
        // fallback via vanilla php
9872
        //
9873
9874
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9875
9876
        if (!isset($match[1])) {
9877
            return false;
9878
        }
9879
9880
        if ($before_needle) {
9881
            return $match[1];
9882
        }
9883
9884
        return self::substr($haystack, (int) self::strlen($match[1]));
9885
    }
9886
9887
    /**
9888
     *  * Finds first occurrence of a string within another.
9889
     *
9890
     * @param string $haystack      <p>
9891
     *                              The string from which to get the first occurrence
9892
     *                              of needle.
9893
     *                              </p>
9894
     * @param string $needle        <p>
9895
     *                              The string to find in haystack.
9896
     *                              </p>
9897
     * @param bool   $before_needle [optional] <p>
9898
     *                              Determines which portion of haystack
9899
     *                              this function returns.
9900
     *                              If set to true, it returns all of haystack
9901
     *                              from the beginning to the first occurrence of needle.
9902
     *                              If set to false, it returns all of haystack
9903
     *                              from the first occurrence of needle to the end,
9904
     *                              </p>
9905
     *
9906
     * @return false|string
9907
     *                      <p>The portion of haystack,
9908
     *                      or false if needle is not found.</p>
9909
     */
9910
    public static function strstr_in_byte(
9911
        string $haystack,
9912
        string $needle,
9913
        bool $before_needle = false
9914
    ) {
9915
        if ($haystack === '' || $needle === '') {
9916
            return false;
9917
        }
9918
9919
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9920
            // "mb_" is available if overload is used, so use it ...
9921
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9922
        }
9923
9924
        return \strstr($haystack, $needle, $before_needle);
9925
    }
9926
9927
    /**
9928
     * Unicode transformation for case-less matching.
9929
     *
9930
     * @see http://unicode.org/reports/tr21/tr21-5.html
9931
     *
9932
     * @param string      $str        <p>The input string.</p>
9933
     * @param bool        $full       [optional] <p>
9934
     *                                <b>true</b>, replace full case folding chars (default)<br>
9935
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9936
     *                                </p>
9937
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9938
     * @param string      $encoding   [optional] <p>Set the charset.</p>
9939
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9940
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9941
     *                                is for some languages better ...</p>
9942
     *
9943
     * @return string
9944
     */
9945 32
    public static function strtocasefold(
9946
        string $str,
9947
        bool $full = true,
9948
        bool $clean_utf8 = false,
9949
        string $encoding = 'UTF-8',
9950
        string $lang = null,
9951
        $lower = true
9952
    ): string {
9953 32
        if ($str === '') {
9954 5
            return '';
9955
        }
9956
9957 31
        if ($clean_utf8 === true) {
9958
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9959
            // if invalid characters are found in $haystack before $needle
9960 2
            $str = self::clean($str);
9961
        }
9962
9963 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9964
9965 31
        if ($lang === null && $encoding === 'UTF-8') {
9966 31
            if ($lower === true) {
9967 2
                return \mb_strtolower($str);
9968
            }
9969
9970 29
            return \mb_strtoupper($str);
9971
        }
9972
9973 2
        if ($lower === true) {
9974
            return self::strtolower($str, $encoding, false, $lang);
9975
        }
9976
9977 2
        return self::strtoupper($str, $encoding, false, $lang);
9978
    }
9979
9980
    /**
9981
     * Make a string lowercase.
9982
     *
9983
     * @see http://php.net/manual/en/function.mb-strtolower.php
9984
     *
9985
     * @param string      $str                           <p>The string being lowercased.</p>
9986
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
9987
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9988
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9989
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9990
     *
9991
     * @return string
9992
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9993
     */
9994 73
    public static function strtolower(
9995
        $str,
9996
        string $encoding = 'UTF-8',
9997
        bool $clean_utf8 = false,
9998
        string $lang = null,
9999
        bool $try_to_keep_the_string_length = false
10000
    ): string {
10001
        // init
10002 73
        $str = (string) $str;
10003
10004 73
        if ($str === '') {
10005 1
            return '';
10006
        }
10007
10008 72
        if ($clean_utf8 === true) {
10009
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10010
            // if invalid characters are found in $haystack before $needle
10011 2
            $str = self::clean($str);
10012
        }
10013
10014
        // hack for old php version or for the polyfill ...
10015 72
        if ($try_to_keep_the_string_length === true) {
10016
            $str = self::fixStrCaseHelper($str, true);
10017
        }
10018
10019 72
        if ($lang === null && $encoding === 'UTF-8') {
10020 13
            return \mb_strtolower($str);
10021
        }
10022
10023 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10024
10025 61
        if ($lang !== null) {
10026 2
            if (self::$SUPPORT['intl'] === true) {
10027 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10028
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10029
                }
10030
10031 2
                $language_code = $lang . '-Lower';
10032 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10033
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10034
10035
                    $language_code = 'Any-Lower';
10036
                }
10037
10038
                /** @noinspection PhpComposerExtensionStubsInspection */
10039
                /** @noinspection UnnecessaryCastingInspection */
10040 2
                return (string) \transliterator_transliterate($language_code, $str);
10041
            }
10042
10043
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10044
        }
10045
10046
        // always fallback via symfony polyfill
10047 61
        return \mb_strtolower($str, $encoding);
10048
    }
10049
10050
    /**
10051
     * Make a string uppercase.
10052
     *
10053
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10054
     *
10055
     * @param string      $str                           <p>The string being uppercased.</p>
10056
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10057
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10058
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10059
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10060
     *
10061
     * @return string
10062
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10063
     */
10064 17
    public static function strtoupper(
10065
        $str,
10066
        string $encoding = 'UTF-8',
10067
        bool $clean_utf8 = false,
10068
        string $lang = null,
10069
        bool $try_to_keep_the_string_length = false
10070
    ): string {
10071
        // init
10072 17
        $str = (string) $str;
10073
10074 17
        if ($str === '') {
10075 1
            return '';
10076
        }
10077
10078 16
        if ($clean_utf8 === true) {
10079
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10080
            // if invalid characters are found in $haystack before $needle
10081 2
            $str = self::clean($str);
10082
        }
10083
10084
        // hack for old php version or for the polyfill ...
10085 16
        if ($try_to_keep_the_string_length === true) {
10086 2
            $str = self::fixStrCaseHelper($str, false);
10087
        }
10088
10089 16
        if ($lang === null && $encoding === 'UTF-8') {
10090 8
            return \mb_strtoupper($str);
10091
        }
10092
10093 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10094
10095 10
        if ($lang !== null) {
10096 2
            if (self::$SUPPORT['intl'] === true) {
10097 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10098
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10099
                }
10100
10101 2
                $language_code = $lang . '-Upper';
10102 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10103
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10104
10105
                    $language_code = 'Any-Upper';
10106
                }
10107
10108
                /** @noinspection PhpComposerExtensionStubsInspection */
10109
                /** @noinspection UnnecessaryCastingInspection */
10110 2
                return (string) \transliterator_transliterate($language_code, $str);
10111
            }
10112
10113
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10114
        }
10115
10116
        // always fallback via symfony polyfill
10117 10
        return \mb_strtoupper($str, $encoding);
10118
    }
10119
10120
    /**
10121
     * Translate characters or replace sub-strings.
10122
     *
10123
     * @see http://php.net/manual/en/function.strtr.php
10124
     *
10125
     * @param string          $str  <p>The string being translated.</p>
10126
     * @param string|string[] $from <p>The string replacing from.</p>
10127
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10128
     *
10129
     * @return string
10130
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10131
     *                corresponding character in "to".</p>
10132
     */
10133 2
    public static function strtr(string $str, $from, $to = ''): string
10134
    {
10135 2
        if ($str === '') {
10136
            return '';
10137
        }
10138
10139 2
        if ($from === $to) {
10140
            return $str;
10141
        }
10142
10143 2
        if ($to !== '') {
10144 2
            $from = self::str_split($from);
10145 2
            $to = self::str_split($to);
10146 2
            $count_from = \count($from);
10147 2
            $count_to = \count($to);
10148
10149 2
            if ($count_from > $count_to) {
10150 2
                $from = \array_slice($from, 0, $count_to);
10151 2
            } elseif ($count_from < $count_to) {
10152 2
                $to = \array_slice($to, 0, $count_from);
10153
            }
10154
10155 2
            $from = \array_combine($from, $to);
10156
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10157 2
            if ($from === false) {
10158
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10159
            }
10160
        }
10161
10162 2
        if (\is_string($from)) {
10163 2
            return \str_replace($from, '', $str);
10164
        }
10165
10166 2
        return \strtr($str, $from);
10167
    }
10168
10169
    /**
10170
     * Return the width of a string.
10171
     *
10172
     * @param string $str        <p>The input string.</p>
10173
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10174
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10175
     *
10176
     * @return int
10177
     */
10178 2
    public static function strwidth(
10179
        string $str,
10180
        string $encoding = 'UTF-8',
10181
        bool $clean_utf8 = false
10182
    ): int {
10183 2
        if ($str === '') {
10184 2
            return 0;
10185
        }
10186
10187 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10188 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10189
        }
10190
10191 2
        if ($clean_utf8 === true) {
10192
            // iconv and mbstring are not tolerant to invalid encoding
10193
            // further, their behaviour is inconsistent with that of PHP's substr
10194 2
            $str = self::clean($str);
10195
        }
10196
10197
        //
10198
        // fallback via mbstring
10199
        //
10200
10201 2
        if (self::$SUPPORT['mbstring'] === true) {
10202 2
            if ($encoding === 'UTF-8') {
10203 2
                return \mb_strwidth($str);
10204
            }
10205
10206
            return \mb_strwidth($str, $encoding);
10207
        }
10208
10209
        //
10210
        // fallback via vanilla php
10211
        //
10212
10213
        if ($encoding !== 'UTF-8') {
10214
            $str = self::encode('UTF-8', $str, false, $encoding);
10215
        }
10216
10217
        $wide = 0;
10218
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10219
10220
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10221
    }
10222
10223
    /**
10224
     * Get part of a string.
10225
     *
10226
     * @see http://php.net/manual/en/function.mb-substr.php
10227
     *
10228
     * @param string $str        <p>The string being checked.</p>
10229
     * @param int    $offset     <p>The first position used in str.</p>
10230
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10231
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10232
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10233
     *
10234
     * @return false|string
10235
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10236
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10237
     *                      characters long, <b>FALSE</b> will be returned.
10238
     */
10239 172
    public static function substr(
10240
        string $str,
10241
        int $offset = 0,
10242
        int $length = null,
10243
        string $encoding = 'UTF-8',
10244
        bool $clean_utf8 = false
10245
    ) {
10246
        // empty string
10247 172
        if ($str === '' || $length === 0) {
10248 8
            return '';
10249
        }
10250
10251 168
        if ($clean_utf8 === true) {
10252
            // iconv and mbstring are not tolerant to invalid encoding
10253
            // further, their behaviour is inconsistent with that of PHP's substr
10254 2
            $str = self::clean($str);
10255
        }
10256
10257
        // whole string
10258 168
        if (!$offset && $length === null) {
10259 7
            return $str;
10260
        }
10261
10262 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10263 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10264
        }
10265
10266
        //
10267
        // fallback via mbstring
10268
        //
10269
10270 163
        if (self::$SUPPORT['mbstring'] === true) {
10271 161
            if ($encoding === 'UTF-8') {
10272 161
                if ($length === null) {
10273 64
                    return \mb_substr($str, $offset);
10274
                }
10275
10276 102
                return \mb_substr($str, $offset, $length);
10277
            }
10278
10279
            return self::substr($str, $offset, $length, $encoding);
10280
        }
10281
10282
        //
10283
        // fallback for binary || ascii only
10284
        //
10285
10286
        if (
10287 4
            $encoding === 'CP850'
10288
            ||
10289 4
            $encoding === 'ASCII'
10290
        ) {
10291
            if ($length === null) {
10292
                return \substr($str, $offset);
10293
            }
10294
10295
            return \substr($str, $offset, $length);
10296
        }
10297
10298
        // otherwise we need the string-length
10299 4
        $str_length = 0;
10300 4
        if ($offset || $length === null) {
10301 4
            $str_length = self::strlen($str, $encoding);
10302
        }
10303
10304
        // e.g.: invalid chars + mbstring not installed
10305 4
        if ($str_length === false) {
10306
            return false;
10307
        }
10308
10309
        // empty string
10310 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10311
            return '';
10312
        }
10313
10314
        // impossible
10315 4
        if ($offset && $offset > $str_length) {
10316
            return '';
10317
        }
10318
10319 4
        if ($length === null) {
10320 4
            $length = (int) $str_length;
10321
        } else {
10322 2
            $length = (int) $length;
10323
        }
10324
10325
        if (
10326 4
            $encoding !== 'UTF-8'
10327
            &&
10328 4
            self::$SUPPORT['mbstring'] === false
10329
        ) {
10330 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10331
        }
10332
10333
        //
10334
        // fallback via intl
10335
        //
10336
10337
        if (
10338 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10339
            &&
10340 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10341
            &&
10342 4
            self::$SUPPORT['intl'] === true
10343
        ) {
10344
            $return_tmp = \grapheme_substr($str, $offset, $length);
10345
            if ($return_tmp !== false) {
10346
                return $return_tmp;
10347
            }
10348
        }
10349
10350
        //
10351
        // fallback via iconv
10352
        //
10353
10354
        if (
10355 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10356
            &&
10357 4
            self::$SUPPORT['iconv'] === true
10358
        ) {
10359
            $return_tmp = \iconv_substr($str, $offset, $length);
10360
            if ($return_tmp !== false) {
10361
                return $return_tmp;
10362
            }
10363
        }
10364
10365
        //
10366
        // fallback for ascii only
10367
        //
10368
10369 4
        if (ASCII::is_ascii($str)) {
10370
            return \substr($str, $offset, $length);
10371
        }
10372
10373
        //
10374
        // fallback via vanilla php
10375
        //
10376
10377
        // split to array, and remove invalid characters
10378 4
        $array = self::str_split($str);
10379
10380
        // extract relevant part, and join to make sting again
10381 4
        return \implode('', \array_slice($array, $offset, $length));
10382
    }
10383
10384
    /**
10385
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
10386
     *
10387
     * @param string   $str1               <p>The main string being compared.</p>
10388
     * @param string   $str2               <p>The secondary string being compared.</p>
10389
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10390
     *                                     counting from the end of the string.</p>
10391
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10392
     *                                     of the length of the str compared to the length of main_str less the
10393
     *                                     offset.</p>
10394
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10395
     *                                     insensitive.</p>
10396
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10397
     *
10398
     * @return int
10399
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10400
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10401
     *             <strong>0</strong> if they are equal
10402
     */
10403 2
    public static function substr_compare(
10404
        string $str1,
10405
        string $str2,
10406
        int $offset = 0,
10407
        int $length = null,
10408
        bool $case_insensitivity = false,
10409
        string $encoding = 'UTF-8'
10410
    ): int {
10411
        if (
10412 2
            $offset !== 0
10413
            ||
10414 2
            $length !== null
10415
        ) {
10416 2
            if ($encoding === 'UTF-8') {
10417 2
                if ($length === null) {
10418 2
                    $str1 = (string) \mb_substr($str1, $offset);
10419
                } else {
10420 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10421
                }
10422 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10423
            } else {
10424
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10425
10426
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10427
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10428
            }
10429
        }
10430
10431 2
        if ($case_insensitivity === true) {
10432 2
            return self::strcasecmp($str1, $str2, $encoding);
10433
        }
10434
10435 2
        return self::strcmp($str1, $str2);
10436
    }
10437
10438
    /**
10439
     * Count the number of substring occurrences.
10440
     *
10441
     * @see http://php.net/manual/en/function.substr-count.php
10442
     *
10443
     * @param string $haystack   <p>The string to search in.</p>
10444
     * @param string $needle     <p>The substring to search for.</p>
10445
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10446
     * @param int    $length     [optional] <p>
10447
     *                           The maximum length after the specified offset to search for the
10448
     *                           substring. It outputs a warning if the offset plus the length is
10449
     *                           greater than the haystack length.
10450
     *                           </p>
10451
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10452
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10453
     *
10454
     * @return false|int this functions returns an integer or false if there isn't a string
10455
     */
10456 5
    public static function substr_count(
10457
        string $haystack,
10458
        string $needle,
10459
        int $offset = 0,
10460
        int $length = null,
10461
        string $encoding = 'UTF-8',
10462
        bool $clean_utf8 = false
10463
    ) {
10464 5
        if ($haystack === '' || $needle === '') {
10465 2
            return false;
10466
        }
10467
10468 5
        if ($length === 0) {
10469 2
            return 0;
10470
        }
10471
10472 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10473 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10474
        }
10475
10476 5
        if ($clean_utf8 === true) {
10477
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10478
            // if invalid characters are found in $haystack before $needle
10479
            $needle = self::clean($needle);
10480
            $haystack = self::clean($haystack);
10481
        }
10482
10483 5
        if ($offset || $length > 0) {
10484 2
            if ($length === null) {
10485 2
                $length_tmp = self::strlen($haystack, $encoding);
10486 2
                if ($length_tmp === false) {
10487
                    return false;
10488
                }
10489 2
                $length = (int) $length_tmp;
10490
            }
10491
10492 2
            if ($encoding === 'UTF-8') {
10493 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10494
            } else {
10495 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10496
            }
10497
        }
10498
10499
        if (
10500 5
            $encoding !== 'UTF-8'
10501
            &&
10502 5
            self::$SUPPORT['mbstring'] === false
10503
        ) {
10504
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10505
        }
10506
10507 5
        if (self::$SUPPORT['mbstring'] === true) {
10508 5
            if ($encoding === 'UTF-8') {
10509 5
                return \mb_substr_count($haystack, $needle);
10510
            }
10511
10512 2
            return \mb_substr_count($haystack, $needle, $encoding);
10513
        }
10514
10515
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10516
10517
        return \count($matches);
10518
    }
10519
10520
    /**
10521
     * Count the number of substring occurrences.
10522
     *
10523
     * @param string $haystack <p>
10524
     *                         The string being checked.
10525
     *                         </p>
10526
     * @param string $needle   <p>
10527
     *                         The string being found.
10528
     *                         </p>
10529
     * @param int    $offset   [optional] <p>
10530
     *                         The offset where to start counting
10531
     *                         </p>
10532
     * @param int    $length   [optional] <p>
10533
     *                         The maximum length after the specified offset to search for the
10534
     *                         substring. It outputs a warning if the offset plus the length is
10535
     *                         greater than the haystack length.
10536
     *                         </p>
10537
     *
10538
     * @return false|int the number of times the
10539
     *                   needle substring occurs in the
10540
     *                   haystack string
10541
     */
10542
    public static function substr_count_in_byte(
10543
        string $haystack,
10544
        string $needle,
10545
        int $offset = 0,
10546
        int $length = null
10547
    ) {
10548
        if ($haystack === '' || $needle === '') {
10549
            return 0;
10550
        }
10551
10552
        if (
10553
            ($offset || $length !== null)
10554
            &&
10555
            self::$SUPPORT['mbstring_func_overload'] === true
10556
        ) {
10557
            if ($length === null) {
10558
                $length_tmp = self::strlen($haystack);
10559
                if ($length_tmp === false) {
10560
                    return false;
10561
                }
10562
                $length = (int) $length_tmp;
10563
            }
10564
10565
            if (
10566
                (
10567
                    $length !== 0
10568
                    &&
10569
                    $offset !== 0
10570
                )
10571
                &&
10572
                ($length + $offset) <= 0
10573
                &&
10574
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10575
            ) {
10576
                return false;
10577
            }
10578
10579
            $haystack_tmp = \substr($haystack, $offset, $length);
10580
            if ($haystack_tmp === false) {
10581
                $haystack_tmp = '';
10582
            }
10583
            $haystack = (string) $haystack_tmp;
10584
        }
10585
10586
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10587
            // "mb_" is available if overload is used, so use it ...
10588
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10589
        }
10590
10591
        if ($length === null) {
10592
            return \substr_count($haystack, $needle, $offset);
10593
        }
10594
10595
        return \substr_count($haystack, $needle, $offset, $length);
10596
    }
10597
10598
    /**
10599
     * Returns the number of occurrences of $substring in the given string.
10600
     * By default, the comparison is case-sensitive, but can be made insensitive
10601
     * by setting $case_sensitive to false.
10602
     *
10603
     * @param string $str            <p>The input string.</p>
10604
     * @param string $substring      <p>The substring to search for.</p>
10605
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10606
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10607
     *
10608
     * @return int
10609
     */
10610 15
    public static function substr_count_simple(
10611
        string $str,
10612
        string $substring,
10613
        bool $case_sensitive = true,
10614
        string $encoding = 'UTF-8'
10615
    ): int {
10616 15
        if ($str === '' || $substring === '') {
10617 2
            return 0;
10618
        }
10619
10620 13
        if ($encoding === 'UTF-8') {
10621 7
            if ($case_sensitive) {
10622
                return (int) \mb_substr_count($str, $substring);
10623
            }
10624
10625 7
            return (int) \mb_substr_count(
10626 7
                \mb_strtoupper($str),
10627 7
                \mb_strtoupper($substring)
10628
            );
10629
        }
10630
10631 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10632
10633 6
        if ($case_sensitive) {
10634 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10635
        }
10636
10637 3
        return (int) \mb_substr_count(
10638 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10639 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10640 3
            $encoding
10641
        );
10642
    }
10643
10644
    /**
10645
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
10646
     *
10647
     * @param string $haystack <p>The string to search in.</p>
10648
     * @param string $needle   <p>The substring to search for.</p>
10649
     *
10650
     * @return string return the sub-string
10651
     */
10652 2
    public static function substr_ileft(string $haystack, string $needle): string
10653
    {
10654 2
        if ($haystack === '') {
10655 2
            return '';
10656
        }
10657
10658 2
        if ($needle === '') {
10659 2
            return $haystack;
10660
        }
10661
10662 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10663 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10664
        }
10665
10666 2
        return $haystack;
10667
    }
10668
10669
    /**
10670
     * Get part of a string process in bytes.
10671
     *
10672
     * @param string $str    <p>The string being checked.</p>
10673
     * @param int    $offset <p>The first position used in str.</p>
10674
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10675
     *
10676
     * @return false|string
10677
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10678
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10679
     *                      characters long, <b>FALSE</b> will be returned.
10680
     */
10681
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10682
    {
10683
        // empty string
10684
        if ($str === '' || $length === 0) {
10685
            return '';
10686
        }
10687
10688
        // whole string
10689
        if (!$offset && $length === null) {
10690
            return $str;
10691
        }
10692
10693
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10694
            // "mb_" is available if overload is used, so use it ...
10695
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10696
        }
10697
10698
        return \substr($str, $offset, $length ?? 2147483647);
10699
    }
10700
10701
    /**
10702
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
10703
     *
10704
     * @param string $haystack <p>The string to search in.</p>
10705
     * @param string $needle   <p>The substring to search for.</p>
10706
     *
10707
     * @return string return the sub-string
10708
     */
10709 2
    public static function substr_iright(string $haystack, string $needle): string
10710
    {
10711 2
        if ($haystack === '') {
10712 2
            return '';
10713
        }
10714
10715 2
        if ($needle === '') {
10716 2
            return $haystack;
10717
        }
10718
10719 2
        if (self::str_iends_with($haystack, $needle) === true) {
10720 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10721
        }
10722
10723 2
        return $haystack;
10724
    }
10725
10726
    /**
10727
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
10728
     *
10729
     * @param string $haystack <p>The string to search in.</p>
10730
     * @param string $needle   <p>The substring to search for.</p>
10731
     *
10732
     * @return string return the sub-string
10733
     */
10734 2
    public static function substr_left(string $haystack, string $needle): string
10735
    {
10736 2
        if ($haystack === '') {
10737 2
            return '';
10738
        }
10739
10740 2
        if ($needle === '') {
10741 2
            return $haystack;
10742
        }
10743
10744 2
        if (self::str_starts_with($haystack, $needle) === true) {
10745 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10746
        }
10747
10748 2
        return $haystack;
10749
    }
10750
10751
    /**
10752
     * Replace text within a portion of a string.
10753
     *
10754
     * source: https://gist.github.com/stemar/8287074
10755
     *
10756
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10757
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10758
     * @param int|int[]       $offset      <p>
10759
     *                                     If start is positive, the replacing will begin at the start'th offset
10760
     *                                     into string.
10761
     *                                     <br><br>
10762
     *                                     If start is negative, the replacing will begin at the start'th character
10763
     *                                     from the end of string.
10764
     *                                     </p>
10765
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10766
     *                                     portion of string which is to be replaced. If it is negative, it
10767
     *                                     represents the number of characters from the end of string at which to
10768
     *                                     stop replacing. If it is not given, then it will default to strlen(
10769
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10770
     *                                     length is zero then this function will have the effect of inserting
10771
     *                                     replacement into string at the given start offset.</p>
10772
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10773
     *
10774
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10775
     */
10776 10
    public static function substr_replace(
10777
        $str,
10778
        $replacement,
10779
        $offset,
10780
        $length = null,
10781
        string $encoding = 'UTF-8'
10782
    ) {
10783 10
        if (\is_array($str) === true) {
10784 1
            $num = \count($str);
10785
10786
            // the replacement
10787 1
            if (\is_array($replacement) === true) {
10788 1
                $replacement = \array_slice($replacement, 0, $num);
10789
            } else {
10790 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10791
            }
10792
10793
            // the offset
10794 1
            if (\is_array($offset) === true) {
10795 1
                $offset = \array_slice($offset, 0, $num);
10796 1
                foreach ($offset as &$value_tmp) {
10797 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10798
                }
10799 1
                unset($value_tmp);
10800
            } else {
10801 1
                $offset = \array_pad([$offset], $num, $offset);
10802
            }
10803
10804
            // the length
10805 1
            if ($length === null) {
10806 1
                $length = \array_fill(0, $num, 0);
10807 1
            } elseif (\is_array($length) === true) {
10808 1
                $length = \array_slice($length, 0, $num);
10809 1
                foreach ($length as &$value_tmp_V2) {
10810 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
10811
                }
10812 1
                unset($value_tmp_V2);
10813
            } else {
10814 1
                $length = \array_pad([$length], $num, $length);
10815
            }
10816
10817
            // recursive call
10818 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10819
        }
10820
10821 10
        if (\is_array($replacement) === true) {
10822 1
            if (\count($replacement) > 0) {
10823 1
                $replacement = $replacement[0];
10824
            } else {
10825 1
                $replacement = '';
10826
            }
10827
        }
10828
10829
        // init
10830 10
        $str = (string) $str;
10831 10
        $replacement = (string) $replacement;
10832
10833 10
        if (\is_array($length) === true) {
10834
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10835
        }
10836
10837 10
        if (\is_array($offset) === true) {
10838
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10839
        }
10840
10841 10
        if ($str === '') {
10842 1
            return $replacement;
10843
        }
10844
10845 9
        if (self::$SUPPORT['mbstring'] === true) {
10846 9
            $string_length = (int) self::strlen($str, $encoding);
10847
10848 9
            if ($offset < 0) {
10849 1
                $offset = (int) \max(0, $string_length + $offset);
10850 9
            } elseif ($offset > $string_length) {
10851 1
                $offset = $string_length;
10852
            }
10853
10854 9
            if ($length !== null && $length < 0) {
10855 1
                $length = (int) \max(0, $string_length - $offset + $length);
10856 9
            } elseif ($length === null || $length > $string_length) {
10857 4
                $length = $string_length;
10858
            }
10859
10860
            /** @noinspection AdditionOperationOnArraysInspection */
10861 9
            if (($offset + $length) > $string_length) {
10862 4
                $length = $string_length - $offset;
10863
            }
10864
10865
            /** @noinspection AdditionOperationOnArraysInspection */
10866 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10867 9
                   $replacement .
10868 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10869
        }
10870
10871
        //
10872
        // fallback for ascii only
10873
        //
10874
10875
        if (ASCII::is_ascii($str)) {
10876
            return ($length === null) ?
10877
                \substr_replace($str, $replacement, $offset) :
10878
                \substr_replace($str, $replacement, $offset, $length);
10879
        }
10880
10881
        //
10882
        // fallback via vanilla php
10883
        //
10884
10885
        \preg_match_all('/./us', $str, $str_matches);
10886
        \preg_match_all('/./us', $replacement, $replacement_matches);
10887
10888
        if ($length === null) {
10889
            $length_tmp = self::strlen($str, $encoding);
10890
            if ($length_tmp === false) {
10891
                // e.g.: non mbstring support + invalid chars
10892
                return '';
10893
            }
10894
            $length = (int) $length_tmp;
10895
        }
10896
10897
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
10898
10899
        return \implode('', $str_matches[0]);
10900
    }
10901
10902
    /**
10903
     * Removes a suffix ($needle) from the end of the string ($haystack).
10904
     *
10905
     * @param string $haystack <p>The string to search in.</p>
10906
     * @param string $needle   <p>The substring to search for.</p>
10907
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10908
     *
10909
     * @return string return the sub-string
10910
     */
10911 2
    public static function substr_right(
10912
        string $haystack,
10913
        string $needle,
10914
        string $encoding = 'UTF-8'
10915
    ): string {
10916 2
        if ($haystack === '') {
10917 2
            return '';
10918
        }
10919
10920 2
        if ($needle === '') {
10921 2
            return $haystack;
10922
        }
10923
10924
        if (
10925 2
            $encoding === 'UTF-8'
10926
            &&
10927 2
            \substr($haystack, -\strlen($needle)) === $needle
10928
        ) {
10929 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10930
        }
10931
10932 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10933
            return (string) self::substr(
10934
                $haystack,
10935
                0,
10936
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10937
                $encoding
10938
            );
10939
        }
10940
10941 2
        return $haystack;
10942
    }
10943
10944
    /**
10945
     * Returns a case swapped version of the string.
10946
     *
10947
     * @param string $str        <p>The input string.</p>
10948
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10949
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10950
     *
10951
     * @return string each character's case swapped
10952
     */
10953 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
10954
    {
10955 6
        if ($str === '') {
10956 1
            return '';
10957
        }
10958
10959 6
        if ($clean_utf8 === true) {
10960
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10961
            // if invalid characters are found in $haystack before $needle
10962 2
            $str = self::clean($str);
10963
        }
10964
10965 6
        if ($encoding === 'UTF-8') {
10966 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10967
        }
10968
10969 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10970
    }
10971
10972
    /**
10973
     * Checks whether symfony-polyfills are used.
10974
     *
10975
     * @return bool
10976
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10977
     */
10978
    public static function symfony_polyfill_used(): bool
10979
    {
10980
        // init
10981
        $return = false;
10982
10983
        $return_tmp = \extension_loaded('mbstring');
10984
        if ($return_tmp === false && \function_exists('mb_strlen')) {
10985
            $return = true;
10986
        }
10987
10988
        $return_tmp = \extension_loaded('iconv');
10989
        if ($return_tmp === false && \function_exists('iconv')) {
10990
            $return = true;
10991
        }
10992
10993
        return $return;
10994
    }
10995
10996
    /**
10997
     * @param string $str
10998
     * @param int    $tab_length
10999
     *
11000
     * @return string
11001
     */
11002 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11003
    {
11004 6
        if ($tab_length === 4) {
11005 3
            $spaces = '    ';
11006 3
        } elseif ($tab_length === 2) {
11007 1
            $spaces = '  ';
11008
        } else {
11009 2
            $spaces = \str_repeat(' ', $tab_length);
11010
        }
11011
11012 6
        return \str_replace("\t", $spaces, $str);
11013
    }
11014
11015
    /**
11016
     * Converts the first character of each word in the string to uppercase
11017
     * and all other chars to lowercase.
11018
     *
11019
     * @param string      $str                           <p>The input string.</p>
11020
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11021
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11022
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11023
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11024
     *
11025
     * @return string
11026
     *                <p>A string with all characters of $str being title-cased.</p>
11027
     */
11028 5
    public static function titlecase(
11029
        string $str,
11030
        string $encoding = 'UTF-8',
11031
        bool $clean_utf8 = false,
11032
        string $lang = null,
11033
        bool $try_to_keep_the_string_length = false
11034
    ): string {
11035 5
        if ($clean_utf8 === true) {
11036
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11037
            // if invalid characters are found in $haystack before $needle
11038
            $str = self::clean($str);
11039
        }
11040
11041 5
        if ($lang === null && $try_to_keep_the_string_length === false) {
11042 5
            if ($encoding === 'UTF-8') {
11043 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11044
            }
11045
11046 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11047
11048 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11049
        }
11050
11051
        return self::str_titleize(
11052
            $str,
11053
            null,
11054
            $encoding,
11055
            false,
11056
            $lang,
11057
            $try_to_keep_the_string_length,
11058
            false
11059
        );
11060
    }
11061
11062
    /**
11063
     * alias for "UTF8::to_ascii()"
11064
     *
11065
     * @param string $str
11066
     * @param string $subst_chr
11067
     * @param bool   $strict
11068
     *
11069
     * @return string
11070
     *
11071
     * @see UTF8::to_ascii()
11072
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11073
     */
11074 7
    public static function toAscii(
11075
        string $str,
11076
        string $subst_chr = '?',
11077
        bool $strict = false
11078
    ): string {
11079 7
        return self::to_ascii($str, $subst_chr, $strict);
11080
    }
11081
11082
    /**
11083
     * alias for "UTF8::to_iso8859()"
11084
     *
11085
     * @param string|string[] $str
11086
     *
11087
     * @return string|string[]
11088
     *
11089
     * @see UTF8::to_iso8859()
11090
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11091
     */
11092 2
    public static function toIso8859($str)
11093
    {
11094 2
        return self::to_iso8859($str);
11095
    }
11096
11097
    /**
11098
     * alias for "UTF8::to_latin1()"
11099
     *
11100
     * @param string|string[] $str
11101
     *
11102
     * @return string|string[]
11103
     *
11104
     * @see UTF8::to_latin1()
11105
     * @deprecated <p>please use "UTF8::to_latin1()"</p>
11106
     */
11107 2
    public static function toLatin1($str)
11108
    {
11109 2
        return self::to_latin1($str);
11110
    }
11111
11112
    /**
11113
     * alias for "UTF8::to_utf8()"
11114
     *
11115
     * @param string|string[] $str
11116
     *
11117
     * @return string|string[]
11118
     *
11119
     * @see UTF8::to_utf8()
11120
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11121
     */
11122 2
    public static function toUTF8($str)
11123
    {
11124 2
        return self::to_utf8($str);
11125
    }
11126
11127
    /**
11128
     * Convert a string into ASCII.
11129
     *
11130
     * @param string $str     <p>The input string.</p>
11131
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11132
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11133
     *                        performance</p>
11134
     *
11135
     * @return string
11136
     */
11137 37
    public static function to_ascii(
11138
        string $str,
11139
        string $unknown = '?',
11140
        bool $strict = false
11141
    ): string {
11142 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11143
    }
11144
11145
    /**
11146
     * @param mixed $str
11147
     *
11148
     * @return bool
11149
     */
11150 19
    public static function to_boolean($str): bool
11151
    {
11152
        // init
11153 19
        $str = (string) $str;
11154
11155 19
        if ($str === '') {
11156 2
            return false;
11157
        }
11158
11159
        // Info: http://php.net/manual/en/filter.filters.validate.php
11160
        $map = [
11161 17
            'true'  => true,
11162
            '1'     => true,
11163
            'on'    => true,
11164
            'yes'   => true,
11165
            'false' => false,
11166
            '0'     => false,
11167
            'off'   => false,
11168
            'no'    => false,
11169
        ];
11170
11171 17
        if (isset($map[$str])) {
11172 11
            return $map[$str];
11173
        }
11174
11175 6
        $key = \strtolower($str);
11176 6
        if (isset($map[$key])) {
11177 2
            return $map[$key];
11178
        }
11179
11180 4
        if (\is_numeric($str)) {
11181 2
            return ((float) $str + 0) > 0;
11182
        }
11183
11184 2
        return (bool) \trim($str);
11185
    }
11186
11187
    /**
11188
     * Convert given string to safe filename (and keep string case).
11189
     *
11190
     * @param string $str
11191
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11192
     *                                  simply replaced with hyphen.
11193
     * @param string $fallback_char
11194
     *
11195
     * @return string
11196
     */
11197 1
    public static function to_filename(
11198
        string $str,
11199
        bool $use_transliterate = false,
11200
        string $fallback_char = '-'
11201
    ): string {
11202 1
        return ASCII::to_filename(
11203 1
            $str,
11204 1
            $use_transliterate,
11205 1
            $fallback_char
11206
        );
11207
    }
11208
11209
    /**
11210
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11211
     *
11212
     * @param string|string[] $str
11213
     *
11214
     * @return string|string[]
11215
     */
11216 8
    public static function to_iso8859($str)
11217
    {
11218 8
        if (\is_array($str) === true) {
11219 2
            foreach ($str as $k => &$v) {
11220 2
                $v = self::to_iso8859($v);
11221
            }
11222
11223 2
            return $str;
11224
        }
11225
11226 8
        $str = (string) $str;
11227 8
        if ($str === '') {
11228 2
            return '';
11229
        }
11230
11231 8
        return self::utf8_decode($str);
11232
    }
11233
11234
    /**
11235
     * alias for "UTF8::to_iso8859()"
11236
     *
11237
     * @param string|string[] $str
11238
     *
11239
     * @return string|string[]
11240
     *
11241
     * @see UTF8::to_iso8859()
11242
     */
11243 2
    public static function to_latin1($str)
11244
    {
11245 2
        return self::to_iso8859($str);
11246
    }
11247
11248
    /**
11249
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11250
     *
11251
     * <ul>
11252
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
11253
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11254
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11255
     * case.</li>
11256
     * </ul>
11257
     *
11258
     * @param string|string[] $str                        <p>Any string or array.</p>
11259
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11260
     *
11261
     * @return string|string[] the UTF-8 encoded string
11262
     */
11263 41
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11264
    {
11265 41
        if (\is_array($str) === true) {
11266 4
            foreach ($str as $k => &$v) {
11267 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11268
            }
11269
11270 4
            return $str;
11271
        }
11272
11273 41
        $str = (string) $str;
11274 41
        if ($str === '') {
11275 6
            return $str;
11276
        }
11277
11278 41
        $max = \strlen($str);
11279 41
        $buf = '';
11280
11281 41
        for ($i = 0; $i < $max; ++$i) {
11282 41
            $c1 = $str[$i];
11283
11284 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11285
11286 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11287
11288 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11289
11290 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11291 20
                        $buf .= $c1 . $c2;
11292 20
                        ++$i;
11293
                    } else { // not valid UTF8 - convert it
11294 34
                        $buf .= self::to_utf8_convert_helper($c1);
11295
                    }
11296 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11297
11298 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11299 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11300
11301 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11302 15
                        $buf .= $c1 . $c2 . $c3;
11303 15
                        $i += 2;
11304
                    } else { // not valid UTF8 - convert it
11305 33
                        $buf .= self::to_utf8_convert_helper($c1);
11306
                    }
11307 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11308
11309 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11310 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11311 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11312
11313 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11314 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11315 8
                        $i += 3;
11316
                    } else { // not valid UTF8 - convert it
11317 26
                        $buf .= self::to_utf8_convert_helper($c1);
11318
                    }
11319
                } else { // doesn't look like UTF8, but should be converted
11320
11321 37
                    $buf .= self::to_utf8_convert_helper($c1);
11322
                }
11323 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11324
11325 4
                $buf .= self::to_utf8_convert_helper($c1);
11326
            } else { // it doesn't need conversion
11327
11328 38
                $buf .= $c1;
11329
            }
11330
        }
11331
11332
        // decode unicode escape sequences + unicode surrogate pairs
11333 41
        $buf = \preg_replace_callback(
11334 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11335
            /**
11336
             * @param array $matches
11337
             *
11338
             * @return string
11339
             */
11340
            static function (array $matches): string {
11341 12
                if (isset($matches[3])) {
11342 12
                    $cp = (int) \hexdec($matches[3]);
11343
                } else {
11344
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11345
                    $cp = ((int) \hexdec($matches[1]) << 10)
11346
                          + (int) \hexdec($matches[2])
11347
                          + 0x10000
11348
                          - (0xD800 << 10)
11349
                          - 0xDC00;
11350
                }
11351
11352
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11353
                //
11354
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11355
11356 12
                if ($cp < 0x80) {
11357 8
                    return (string) self::chr($cp);
11358
                }
11359
11360 9
                if ($cp < 0xA0) {
11361
                    /** @noinspection UnnecessaryCastingInspection */
11362
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11363
                }
11364
11365 9
                return self::decimal_to_chr($cp);
11366 41
            },
11367 41
            $buf
11368
        );
11369
11370 41
        if ($buf === null) {
11371
            return '';
11372
        }
11373
11374
        // decode UTF-8 codepoints
11375 41
        if ($decode_html_entity_to_utf8 === true) {
11376 2
            $buf = self::html_entity_decode($buf);
11377
        }
11378
11379 41
        return $buf;
11380
    }
11381
11382
    /**
11383
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
11384
     *
11385
     * INFO: This is slower then "trim()"
11386
     *
11387
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11388
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11389
     *
11390
     * @param string      $str   <p>The string to be trimmed</p>
11391
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11392
     *
11393
     * @return string the trimmed string
11394
     */
11395 55
    public static function trim(string $str = '', string $chars = null): string
11396
    {
11397 55
        if ($str === '') {
11398 9
            return '';
11399
        }
11400
11401 48
        if ($chars) {
11402 27
            $chars = \preg_quote($chars, '/');
11403 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11404
        } else {
11405 21
            $pattern = '^[\\s]+|[\\s]+$';
11406
        }
11407
11408 48
        if (self::$SUPPORT['mbstring'] === true) {
11409
            /** @noinspection PhpComposerExtensionStubsInspection */
11410 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11411
        }
11412
11413 8
        return self::regex_replace($str, $pattern, '', '', '/');
11414
    }
11415
11416
    /**
11417
     * Makes string's first char uppercase.
11418
     *
11419
     * @param string      $str                           <p>The input string.</p>
11420
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11421
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11422
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11423
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11424
     *
11425
     * @return string the resulting string
11426
     */
11427 69
    public static function ucfirst(
11428
        string $str,
11429
        string $encoding = 'UTF-8',
11430
        bool $clean_utf8 = false,
11431
        string $lang = null,
11432
        bool $try_to_keep_the_string_length = false
11433
    ): string {
11434 69
        if ($str === '') {
11435 3
            return '';
11436
        }
11437
11438 68
        if ($clean_utf8 === true) {
11439
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11440
            // if invalid characters are found in $haystack before $needle
11441 1
            $str = self::clean($str);
11442
        }
11443
11444 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11445
11446 68
        if ($encoding === 'UTF-8') {
11447 22
            $str_part_two = (string) \mb_substr($str, 1);
11448
11449 22
            if ($use_mb_functions === true) {
11450 22
                $str_part_one = \mb_strtoupper(
11451 22
                    (string) \mb_substr($str, 0, 1)
11452
                );
11453
            } else {
11454
                $str_part_one = self::strtoupper(
11455
                    (string) \mb_substr($str, 0, 1),
11456
                    $encoding,
11457
                    false,
11458
                    $lang,
11459 22
                    $try_to_keep_the_string_length
11460
                );
11461
            }
11462
        } else {
11463 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11464
11465 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11466
11467 47
            if ($use_mb_functions === true) {
11468 47
                $str_part_one = \mb_strtoupper(
11469 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11470 47
                    $encoding
11471
                );
11472
            } else {
11473
                $str_part_one = self::strtoupper(
11474
                    (string) self::substr($str, 0, 1, $encoding),
11475
                    $encoding,
11476
                    false,
11477
                    $lang,
11478
                    $try_to_keep_the_string_length
11479
                );
11480
            }
11481
        }
11482
11483 68
        return $str_part_one . $str_part_two;
11484
    }
11485
11486
    /**
11487
     * alias for "UTF8::ucfirst()"
11488
     *
11489
     * @param string $str
11490
     * @param string $encoding
11491
     * @param bool   $clean_utf8
11492
     *
11493
     * @return string
11494
     *
11495
     * @see UTF8::ucfirst()
11496
     */
11497 1
    public static function ucword(
11498
        string $str,
11499
        string $encoding = 'UTF-8',
11500
        bool $clean_utf8 = false
11501
    ): string {
11502 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11503
    }
11504
11505
    /**
11506
     * Uppercase for all words in the string.
11507
     *
11508
     * @param string   $str        <p>The input string.</p>
11509
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11510
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11511
     *                             word.</p>
11512
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11513
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11514
     *
11515
     * @return string
11516
     */
11517 8
    public static function ucwords(
11518
        string $str,
11519
        array $exceptions = [],
11520
        string $char_list = '',
11521
        string $encoding = 'UTF-8',
11522
        bool $clean_utf8 = false
11523
    ): string {
11524 8
        if (!$str) {
11525 2
            return '';
11526
        }
11527
11528
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11529
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11530
11531 7
        if ($clean_utf8 === true) {
11532
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11533
            // if invalid characters are found in $haystack before $needle
11534 1
            $str = self::clean($str);
11535
        }
11536
11537 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11538
11539
        if (
11540 7
            $use_php_default_functions === true
11541
            &&
11542 7
            ASCII::is_ascii($str) === true
11543
        ) {
11544
            return \ucwords($str);
11545
        }
11546
11547 7
        $words = self::str_to_words($str, $char_list);
11548 7
        $use_exceptions = \count($exceptions) > 0;
11549
11550 7
        foreach ($words as &$word) {
11551 7
            if (!$word) {
11552 7
                continue;
11553
            }
11554
11555
            if (
11556 7
                $use_exceptions === false
11557
                ||
11558 7
                !\in_array($word, $exceptions, true)
11559
            ) {
11560 7
                $word = self::ucfirst($word, $encoding);
11561
            }
11562
        }
11563
11564 7
        return \implode('', $words);
11565
    }
11566
11567
    /**
11568
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
11569
     *
11570
     * e.g:
11571
     * 'test+test'                     => 'test test'
11572
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11573
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11574
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11575
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11576
     * 'Düsseldorf'                   => 'Düsseldorf'
11577
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11578
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11579
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11580
     *
11581
     * @param string $str          <p>The input string.</p>
11582
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11583
     *
11584
     * @return string
11585
     */
11586 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11587
    {
11588 4
        if ($str === '') {
11589 3
            return '';
11590
        }
11591
11592
        if (
11593 4
            \strpos($str, '&') === false
11594
            &&
11595 4
            \strpos($str, '%') === false
11596
            &&
11597 4
            \strpos($str, '+') === false
11598
            &&
11599 4
            \strpos($str, '\u') === false
11600
        ) {
11601 3
            return self::fix_simple_utf8($str);
11602
        }
11603
11604 4
        $str = self::urldecode_unicode_helper($str);
11605
11606
        do {
11607 4
            $str_compare = $str;
11608
11609
            /**
11610
             * @psalm-suppress PossiblyInvalidArgument
11611
             */
11612 4
            $str = self::fix_simple_utf8(
11613 4
                \urldecode(
11614 4
                    self::html_entity_decode(
11615 4
                        self::to_utf8($str),
11616 4
                        \ENT_QUOTES | \ENT_HTML5
11617
                    )
11618
                )
11619
            );
11620 4
        } while ($multi_decode === true && $str_compare !== $str);
11621
11622 4
        return $str;
11623
    }
11624
11625
    /**
11626
     * Return a array with "urlencoded"-win1252 -> UTF-8
11627
     *
11628
     * @return string[]
11629
     *
11630
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11631
     */
11632 2
    public static function urldecode_fix_win1252_chars(): array
11633
    {
11634
        return [
11635 2
            '%20' => ' ',
11636
            '%21' => '!',
11637
            '%22' => '"',
11638
            '%23' => '#',
11639
            '%24' => '$',
11640
            '%25' => '%',
11641
            '%26' => '&',
11642
            '%27' => "'",
11643
            '%28' => '(',
11644
            '%29' => ')',
11645
            '%2A' => '*',
11646
            '%2B' => '+',
11647
            '%2C' => ',',
11648
            '%2D' => '-',
11649
            '%2E' => '.',
11650
            '%2F' => '/',
11651
            '%30' => '0',
11652
            '%31' => '1',
11653
            '%32' => '2',
11654
            '%33' => '3',
11655
            '%34' => '4',
11656
            '%35' => '5',
11657
            '%36' => '6',
11658
            '%37' => '7',
11659
            '%38' => '8',
11660
            '%39' => '9',
11661
            '%3A' => ':',
11662
            '%3B' => ';',
11663
            '%3C' => '<',
11664
            '%3D' => '=',
11665
            '%3E' => '>',
11666
            '%3F' => '?',
11667
            '%40' => '@',
11668
            '%41' => 'A',
11669
            '%42' => 'B',
11670
            '%43' => 'C',
11671
            '%44' => 'D',
11672
            '%45' => 'E',
11673
            '%46' => 'F',
11674
            '%47' => 'G',
11675
            '%48' => 'H',
11676
            '%49' => 'I',
11677
            '%4A' => 'J',
11678
            '%4B' => 'K',
11679
            '%4C' => 'L',
11680
            '%4D' => 'M',
11681
            '%4E' => 'N',
11682
            '%4F' => 'O',
11683
            '%50' => 'P',
11684
            '%51' => 'Q',
11685
            '%52' => 'R',
11686
            '%53' => 'S',
11687
            '%54' => 'T',
11688
            '%55' => 'U',
11689
            '%56' => 'V',
11690
            '%57' => 'W',
11691
            '%58' => 'X',
11692
            '%59' => 'Y',
11693
            '%5A' => 'Z',
11694
            '%5B' => '[',
11695
            '%5C' => '\\',
11696
            '%5D' => ']',
11697
            '%5E' => '^',
11698
            '%5F' => '_',
11699
            '%60' => '`',
11700
            '%61' => 'a',
11701
            '%62' => 'b',
11702
            '%63' => 'c',
11703
            '%64' => 'd',
11704
            '%65' => 'e',
11705
            '%66' => 'f',
11706
            '%67' => 'g',
11707
            '%68' => 'h',
11708
            '%69' => 'i',
11709
            '%6A' => 'j',
11710
            '%6B' => 'k',
11711
            '%6C' => 'l',
11712
            '%6D' => 'm',
11713
            '%6E' => 'n',
11714
            '%6F' => 'o',
11715
            '%70' => 'p',
11716
            '%71' => 'q',
11717
            '%72' => 'r',
11718
            '%73' => 's',
11719
            '%74' => 't',
11720
            '%75' => 'u',
11721
            '%76' => 'v',
11722
            '%77' => 'w',
11723
            '%78' => 'x',
11724
            '%79' => 'y',
11725
            '%7A' => 'z',
11726
            '%7B' => '{',
11727
            '%7C' => '|',
11728
            '%7D' => '}',
11729
            '%7E' => '~',
11730
            '%7F' => '',
11731
            '%80' => '`',
11732
            '%81' => '',
11733
            '%82' => '‚',
11734
            '%83' => 'ƒ',
11735
            '%84' => '„',
11736
            '%85' => '…',
11737
            '%86' => '†',
11738
            '%87' => '‡',
11739
            '%88' => 'ˆ',
11740
            '%89' => '‰',
11741
            '%8A' => 'Š',
11742
            '%8B' => '‹',
11743
            '%8C' => 'Œ',
11744
            '%8D' => '',
11745
            '%8E' => 'Ž',
11746
            '%8F' => '',
11747
            '%90' => '',
11748
            '%91' => '‘',
11749
            '%92' => '’',
11750
            '%93' => '“',
11751
            '%94' => '”',
11752
            '%95' => '•',
11753
            '%96' => '–',
11754
            '%97' => '—',
11755
            '%98' => '˜',
11756
            '%99' => '™',
11757
            '%9A' => 'š',
11758
            '%9B' => '›',
11759
            '%9C' => 'œ',
11760
            '%9D' => '',
11761
            '%9E' => 'ž',
11762
            '%9F' => 'Ÿ',
11763
            '%A0' => '',
11764
            '%A1' => '¡',
11765
            '%A2' => '¢',
11766
            '%A3' => '£',
11767
            '%A4' => '¤',
11768
            '%A5' => '¥',
11769
            '%A6' => '¦',
11770
            '%A7' => '§',
11771
            '%A8' => '¨',
11772
            '%A9' => '©',
11773
            '%AA' => 'ª',
11774
            '%AB' => '«',
11775
            '%AC' => '¬',
11776
            '%AD' => '',
11777
            '%AE' => '®',
11778
            '%AF' => '¯',
11779
            '%B0' => '°',
11780
            '%B1' => '±',
11781
            '%B2' => '²',
11782
            '%B3' => '³',
11783
            '%B4' => '´',
11784
            '%B5' => 'µ',
11785
            '%B6' => '¶',
11786
            '%B7' => '·',
11787
            '%B8' => '¸',
11788
            '%B9' => '¹',
11789
            '%BA' => 'º',
11790
            '%BB' => '»',
11791
            '%BC' => '¼',
11792
            '%BD' => '½',
11793
            '%BE' => '¾',
11794
            '%BF' => '¿',
11795
            '%C0' => 'À',
11796
            '%C1' => 'Á',
11797
            '%C2' => 'Â',
11798
            '%C3' => 'Ã',
11799
            '%C4' => 'Ä',
11800
            '%C5' => 'Å',
11801
            '%C6' => 'Æ',
11802
            '%C7' => 'Ç',
11803
            '%C8' => 'È',
11804
            '%C9' => 'É',
11805
            '%CA' => 'Ê',
11806
            '%CB' => 'Ë',
11807
            '%CC' => 'Ì',
11808
            '%CD' => 'Í',
11809
            '%CE' => 'Î',
11810
            '%CF' => 'Ï',
11811
            '%D0' => 'Ð',
11812
            '%D1' => 'Ñ',
11813
            '%D2' => 'Ò',
11814
            '%D3' => 'Ó',
11815
            '%D4' => 'Ô',
11816
            '%D5' => 'Õ',
11817
            '%D6' => 'Ö',
11818
            '%D7' => '×',
11819
            '%D8' => 'Ø',
11820
            '%D9' => 'Ù',
11821
            '%DA' => 'Ú',
11822
            '%DB' => 'Û',
11823
            '%DC' => 'Ü',
11824
            '%DD' => 'Ý',
11825
            '%DE' => 'Þ',
11826
            '%DF' => 'ß',
11827
            '%E0' => 'à',
11828
            '%E1' => 'á',
11829
            '%E2' => 'â',
11830
            '%E3' => 'ã',
11831
            '%E4' => 'ä',
11832
            '%E5' => 'å',
11833
            '%E6' => 'æ',
11834
            '%E7' => 'ç',
11835
            '%E8' => 'è',
11836
            '%E9' => 'é',
11837
            '%EA' => 'ê',
11838
            '%EB' => 'ë',
11839
            '%EC' => 'ì',
11840
            '%ED' => 'í',
11841
            '%EE' => 'î',
11842
            '%EF' => 'ï',
11843
            '%F0' => 'ð',
11844
            '%F1' => 'ñ',
11845
            '%F2' => 'ò',
11846
            '%F3' => 'ó',
11847
            '%F4' => 'ô',
11848
            '%F5' => 'õ',
11849
            '%F6' => 'ö',
11850
            '%F7' => '÷',
11851
            '%F8' => 'ø',
11852
            '%F9' => 'ù',
11853
            '%FA' => 'ú',
11854
            '%FB' => 'û',
11855
            '%FC' => 'ü',
11856
            '%FD' => 'ý',
11857
            '%FE' => 'þ',
11858
            '%FF' => 'ÿ',
11859
        ];
11860
    }
11861
11862
    /**
11863
     * Decodes a UTF-8 string to ISO-8859-1.
11864
     *
11865
     * @param string $str             <p>The input string.</p>
11866
     * @param bool   $keep_utf8_chars
11867
     *
11868
     * @return string
11869
     */
11870 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
11871
    {
11872 14
        if ($str === '') {
11873 6
            return '';
11874
        }
11875
11876
        // save for later comparision
11877 14
        $str_backup = $str;
11878 14
        $len = \strlen($str);
11879
11880 14
        if (self::$ORD === null) {
11881
            self::$ORD = self::getData('ord');
11882
        }
11883
11884 14
        if (self::$CHR === null) {
11885
            self::$CHR = self::getData('chr');
11886
        }
11887
11888 14
        $no_char_found = '?';
11889
        /** @noinspection ForeachInvariantsInspection */
11890 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11891 14
            switch ($str[$i] & "\xF0") {
11892 14
                case "\xC0":
11893 13
                case "\xD0":
11894 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11895 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
11896
11897 13
                    break;
11898
11899
                /** @noinspection PhpMissingBreakStatementInspection */
11900 13
                case "\xF0":
11901
                    ++$i;
11902
11903
                // no break
11904
11905 13
                case "\xE0":
11906 11
                    $str[$j] = $no_char_found;
11907 11
                    $i += 2;
11908
11909 11
                    break;
11910
11911
                default:
11912 12
                    $str[$j] = $str[$i];
11913
            }
11914
        }
11915
11916 14
        $return = \substr($str, 0, $j);
11917 14
        if ($return === false) {
11918
            $return = '';
11919
        }
11920
11921
        if (
11922 14
            $keep_utf8_chars === true
11923
            &&
11924 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
11925
        ) {
11926 2
            return $str_backup;
11927
        }
11928
11929 14
        return $return;
11930
    }
11931
11932
    /**
11933
     * Encodes an ISO-8859-1 string to UTF-8.
11934
     *
11935
     * @param string $str <p>The input string.</p>
11936
     *
11937
     * @return string
11938
     */
11939 14
    public static function utf8_encode(string $str): string
11940
    {
11941 14
        if ($str === '') {
11942 14
            return '';
11943
        }
11944
11945 14
        $str = \utf8_encode($str);
11946
11947
        // the polyfill maybe return false
11948
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11949
        /** @psalm-suppress TypeDoesNotContainType */
11950 14
        if ($str === false) {
11951
            return '';
11952
        }
11953
11954 14
        return $str;
11955
    }
11956
11957
    /**
11958
     * fix -> utf8-win1252 chars
11959
     *
11960
     * @param string $str <p>The input string.</p>
11961
     *
11962
     * @return string
11963
     *
11964
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
11965
     */
11966 2
    public static function utf8_fix_win1252_chars(string $str): string
11967
    {
11968 2
        return self::fix_simple_utf8($str);
11969
    }
11970
11971
    /**
11972
     * Returns an array with all utf8 whitespace characters.
11973
     *
11974
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11975
     *
11976
     * @return string[]
11977
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
11978
     *                  as defined in above URL
11979
     */
11980 2
    public static function whitespace_table(): array
11981
    {
11982 2
        return self::$WHITESPACE_TABLE;
11983
    }
11984
11985
    /**
11986
     * Limit the number of words in a string.
11987
     *
11988
     * @param string $str        <p>The input string.</p>
11989
     * @param int    $limit      <p>The limit of words as integer.</p>
11990
     * @param string $str_add_on <p>Replacement for the striped string.</p>
11991
     *
11992
     * @return string
11993
     */
11994 2
    public static function words_limit(
11995
        string $str,
11996
        int $limit = 100,
11997
        string $str_add_on = '…'
11998
    ): string {
11999 2
        if ($str === '' || $limit < 1) {
12000 2
            return '';
12001
        }
12002
12003 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12004
12005
        if (
12006 2
            !isset($matches[0])
12007
            ||
12008 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12009
        ) {
12010 2
            return $str;
12011
        }
12012
12013 2
        return \rtrim($matches[0]) . $str_add_on;
12014
    }
12015
12016
    /**
12017
     * Wraps a string to a given number of characters
12018
     *
12019
     * @see http://php.net/manual/en/function.wordwrap.php
12020
     *
12021
     * @param string $str   <p>The input string.</p>
12022
     * @param int    $width [optional] <p>The column width.</p>
12023
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12024
     * @param bool   $cut   [optional] <p>
12025
     *                      If the cut is set to true, the string is
12026
     *                      always wrapped at or before the specified width. So if you have
12027
     *                      a word that is larger than the given width, it is broken apart.
12028
     *                      </p>
12029
     *
12030
     * @return string
12031
     *                <p>The given string wrapped at the specified column.</p>
12032
     */
12033 12
    public static function wordwrap(
12034
        string $str,
12035
        int $width = 75,
12036
        string $break = "\n",
12037
        bool $cut = false
12038
    ): string {
12039 12
        if ($str === '' || $break === '') {
12040 4
            return '';
12041
        }
12042
12043 10
        $str_split = \explode($break, $str);
12044 10
        if ($str_split === false) {
12045
            return '';
12046
        }
12047
12048 10
        $chars = [];
12049 10
        $word_split = '';
12050 10
        foreach ($str_split as $i => $i_value) {
12051 10
            if ($i) {
12052 3
                $chars[] = $break;
12053 3
                $word_split .= '#';
12054
            }
12055
12056 10
            foreach (self::str_split($i_value) as $c) {
12057 10
                $chars[] = $c;
12058 10
                if ($c === ' ') {
12059 3
                    $word_split .= ' ';
12060
                } else {
12061 10
                    $word_split .= '?';
12062
                }
12063
            }
12064
        }
12065
12066 10
        $str_return = '';
12067 10
        $j = 0;
12068 10
        $b = -1;
12069 10
        $i = -1;
12070 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12071
12072 10
        $max = \mb_strlen($word_split);
12073 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12074 8
            for (++$i; $i < $b; ++$i) {
12075 8
                $str_return .= $chars[$j];
12076 8
                unset($chars[$j++]);
12077
12078
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12079 8
                if ($i > $max) {
12080
                    break 2;
12081
                }
12082
            }
12083
12084
            if (
12085 8
                $break === $chars[$j]
12086
                ||
12087 8
                $chars[$j] === ' '
12088
            ) {
12089 5
                unset($chars[$j++]);
12090
            }
12091
12092 8
            $str_return .= $break;
12093
12094
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12095 8
            if ($b > $max) {
12096
                break;
12097
            }
12098
        }
12099
12100 10
        return $str_return . \implode('', $chars);
12101
    }
12102
12103
    /**
12104
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12105
     *    ... so that we wrap the per line.
12106
     *
12107
     * @param string      $str             <p>The input string.</p>
12108
     * @param int         $width           [optional] <p>The column width.</p>
12109
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12110
     * @param bool        $cut             [optional] <p>
12111
     *                                     If the cut is set to true, the string is
12112
     *                                     always wrapped at or before the specified width. So if you have
12113
     *                                     a word that is larger than the given width, it is broken apart.
12114
     *                                     </p>
12115
     * @param bool        $add_final_break [optional] <p>
12116
     *                                     If this flag is true, then the method will add a $break at the end
12117
     *                                     of the result string.
12118
     *                                     </p>
12119
     * @param string|null $delimiter       [optional] <p>
12120
     *                                     You can change the default behavior, where we split the string by newline.
12121
     *                                     </p>
12122
     *
12123
     * @return string
12124
     */
12125 1
    public static function wordwrap_per_line(
12126
        string $str,
12127
        int $width = 75,
12128
        string $break = "\n",
12129
        bool $cut = false,
12130
        bool $add_final_break = true,
12131
        string $delimiter = null
12132
    ): string {
12133 1
        if ($delimiter === null) {
12134 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12135
        } else {
12136 1
            $strings = \explode($delimiter, $str);
12137
        }
12138
12139 1
        $string_helper_array = [];
12140 1
        if ($strings !== false) {
12141 1
            foreach ($strings as $value) {
12142 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12143
            }
12144
        }
12145
12146 1
        if ($add_final_break) {
12147 1
            $final_break = $break;
12148
        } else {
12149 1
            $final_break = '';
12150
        }
12151
12152 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12153
    }
12154
12155
    /**
12156
     * Returns an array of Unicode White Space characters.
12157
     *
12158
     * @return string[] an array with numeric code point as key and White Space Character as value
12159
     */
12160 2
    public static function ws(): array
12161
    {
12162 2
        return self::$WHITESPACE;
12163
    }
12164
12165
    /**
12166
     * @param string $str
12167
     * @param string $encoding
12168
     *
12169
     * @return string
12170
     */
12171
    private static function html_entity_decode_helper(string $str, string $encoding): string
12172
    {
12173
        return (string) \preg_replace_callback(
12174
            "/&#\d{2,6};/",
12175
            /**
12176
             * @param string[] $matches
12177
             *
12178
             * @return string
12179
             */
12180
            static function (array $matches) use ($encoding): string {
12181
                $return_tmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
12182
                if ($return_tmp !== '"' && $return_tmp !== "'") {
12183
                    return $return_tmp;
12184
                }
12185
12186
                return $matches[0];
12187
            },
12188
            $str
12189
        );
12190
    }
12191
12192
    /**
12193
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
12194
     *
12195
     * @see http://hsivonen.iki.fi/php-utf8/
12196
     *
12197
     * @param string $str    <p>The string to be checked.</p>
12198
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12199
     *
12200
     * @return bool
12201
     */
12202 108
    private static function is_utf8_string(string $str, bool $strict = false): bool
12203
    {
12204 108
        if ($str === '') {
12205 14
            return true;
12206
        }
12207
12208 102
        if ($strict === true) {
12209 2
            $is_binary = self::is_binary($str, true);
12210
12211 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12212 2
                return false;
12213
            }
12214
12215
            if ($is_binary && self::is_utf32($str, false) !== false) {
12216
                return false;
12217
            }
12218
        }
12219
12220 102
        if (self::pcre_utf8_support() !== true) {
12221
            // If even just the first character can be matched, when the /u
12222
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12223
            // invalid, nothing at all will match, even if the string contains
12224
            // some valid sequences
12225
            return \preg_match('/^./us', $str, $ar) === 1;
12226
        }
12227
12228 102
        $mState = 0; // cached expected number of octets after the current octet
12229
        // until the beginning of the next UTF8 character sequence
12230 102
        $mUcs4 = 0; // cached Unicode character
12231 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12232
12233 102
        if (self::$ORD === null) {
12234
            self::$ORD = self::getData('ord');
12235
        }
12236
12237 102
        $len = \strlen($str);
12238
        /** @noinspection ForeachInvariantsInspection */
12239 102
        for ($i = 0; $i < $len; ++$i) {
12240 102
            $in = self::$ORD[$str[$i]];
12241
12242 102
            if ($mState === 0) {
12243
                // When mState is zero we expect either a US-ASCII character or a
12244
                // multi-octet sequence.
12245 102
                if ((0x80 & $in) === 0) {
12246
                    // US-ASCII, pass straight through.
12247 97
                    $mBytes = 1;
12248 83
                } elseif ((0xE0 & $in) === 0xC0) {
12249
                    // First octet of 2 octet sequence.
12250 73
                    $mUcs4 = $in;
12251 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12252 73
                    $mState = 1;
12253 73
                    $mBytes = 2;
12254 58
                } elseif ((0xF0 & $in) === 0xE0) {
12255
                    // First octet of 3 octet sequence.
12256 42
                    $mUcs4 = $in;
12257 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12258 42
                    $mState = 2;
12259 42
                    $mBytes = 3;
12260 29
                } elseif ((0xF8 & $in) === 0xF0) {
12261
                    // First octet of 4 octet sequence.
12262 18
                    $mUcs4 = $in;
12263 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12264 18
                    $mState = 3;
12265 18
                    $mBytes = 4;
12266 13
                } elseif ((0xFC & $in) === 0xF8) {
12267
                    /* First octet of 5 octet sequence.
12268
                     *
12269
                     * This is illegal because the encoded codepoint must be either
12270
                     * (a) not the shortest form or
12271
                     * (b) outside the Unicode range of 0-0x10FFFF.
12272
                     * Rather than trying to resynchronize, we will carry on until the end
12273
                     * of the sequence and let the later error handling code catch it.
12274
                     */
12275 5
                    $mUcs4 = $in;
12276 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12277 5
                    $mState = 4;
12278 5
                    $mBytes = 5;
12279 10
                } elseif ((0xFE & $in) === 0xFC) {
12280
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12281 5
                    $mUcs4 = $in;
12282 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12283 5
                    $mState = 5;
12284 5
                    $mBytes = 6;
12285
                } else {
12286
                    // Current octet is neither in the US-ASCII range nor a legal first
12287
                    // octet of a multi-octet sequence.
12288 102
                    return false;
12289
                }
12290 83
            } elseif ((0xC0 & $in) === 0x80) {
12291
12292
                // When mState is non-zero, we expect a continuation of the multi-octet
12293
                // sequence
12294
12295
                // Legal continuation.
12296 75
                $shift = ($mState - 1) * 6;
12297 75
                $tmp = $in;
12298 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12299 75
                $mUcs4 |= $tmp;
12300
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12301
                // Unicode code point to be output.
12302 75
                if (--$mState === 0) {
12303
                    // Check for illegal sequences and code points.
12304
                    //
12305
                    // From Unicode 3.1, non-shortest form is illegal
12306
                    if (
12307 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12308
                        ||
12309 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12310
                        ||
12311 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12312
                        ||
12313 75
                        ($mBytes > 4)
12314
                        ||
12315
                        // From Unicode 3.2, surrogate characters are illegal.
12316 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12317
                        ||
12318
                        // Code points outside the Unicode range are illegal.
12319 75
                        ($mUcs4 > 0x10FFFF)
12320
                    ) {
12321 8
                        return false;
12322
                    }
12323
                    // initialize UTF8 cache
12324 75
                    $mState = 0;
12325 75
                    $mUcs4 = 0;
12326 75
                    $mBytes = 1;
12327
                }
12328
            } else {
12329
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12330
                // Incomplete multi-octet sequence.
12331 35
                return false;
12332
            }
12333
        }
12334
12335 67
        return true;
12336
    }
12337
12338
    /**
12339
     * @param string $str
12340
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12341
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12342
     *
12343
     * @return string
12344
     */
12345 33
    private static function fixStrCaseHelper(
12346
        string $str,
12347
        $use_lowercase = false,
12348
        $use_full_case_fold = false
12349
    ): string {
12350 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12351 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12352
12353 33
        if ($use_lowercase === true) {
12354 2
            $str = \str_replace(
12355 2
                $upper,
12356 2
                $lower,
12357 2
                $str
12358
            );
12359
        } else {
12360 31
            $str = \str_replace(
12361 31
                $lower,
12362 31
                $upper,
12363 31
                $str
12364
            );
12365
        }
12366
12367 33
        if ($use_full_case_fold) {
12368 31
            static $FULL_CASE_FOLD = null;
12369 31
            if ($FULL_CASE_FOLD === null) {
12370 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12371
            }
12372
12373 31
            if ($use_lowercase === true) {
12374 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12375
            } else {
12376 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12377
            }
12378
        }
12379
12380 33
        return $str;
12381
    }
12382
12383
    /**
12384
     * get data from "/data/*.php"
12385
     *
12386
     * @param string $file
12387
     *
12388
     * @return array
12389
     */
12390 6
    private static function getData(string $file): array
12391
    {
12392
        /** @noinspection PhpIncludeInspection */
12393
        /** @noinspection UsingInclusionReturnValueInspection */
12394
        /** @psalm-suppress UnresolvableInclude */
12395 6
        return include __DIR__ . '/data/' . $file . '.php';
12396
    }
12397
12398
    /**
12399
     * @return true|null
12400
     */
12401 12
    private static function initEmojiData()
12402
    {
12403 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12404 1
            if (self::$EMOJI === null) {
12405 1
                self::$EMOJI = self::getData('emoji');
12406
            }
12407
12408 1
            \uksort(
12409 1
                self::$EMOJI,
12410
                static function (string $a, string $b): int {
12411 1
                    return \strlen($b) <=> \strlen($a);
12412 1
                }
12413
            );
12414
12415 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12416 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12417
12418 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12419 1
                $tmp_key = \crc32($key);
12420 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12421
            }
12422
12423 1
            return true;
12424
        }
12425
12426 12
        return null;
12427
    }
12428
12429
    /**
12430
     * Checks whether mbstring "overloaded" is active on the server.
12431
     *
12432
     * @return bool
12433
     */
12434
    private static function mbstring_overloaded(): bool
12435
    {
12436
        /**
12437
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12438
         */
12439
12440
        /** @noinspection PhpComposerExtensionStubsInspection */
12441
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12442
        return \defined('MB_OVERLOAD_STRING')
12443
               &&
12444
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12445
    }
12446
12447
    /**
12448
     * @param array    $strings
12449
     * @param bool     $remove_empty_values
12450
     * @param int|null $remove_short_values
12451
     *
12452
     * @return array
12453
     */
12454 2
    private static function reduce_string_array(
12455
        array $strings,
12456
        bool $remove_empty_values,
12457
        int $remove_short_values = null
12458
    ): array {
12459
        // init
12460 2
        $return = [];
12461
12462 2
        foreach ($strings as &$str) {
12463
            if (
12464 2
                $remove_short_values !== null
12465
                &&
12466 2
                \mb_strlen($str) <= $remove_short_values
12467
            ) {
12468 2
                continue;
12469
            }
12470
12471
            if (
12472 2
                $remove_empty_values === true
12473
                &&
12474 2
                \trim($str) === ''
12475
            ) {
12476 2
                continue;
12477
            }
12478
12479 2
            $return[] = $str;
12480
        }
12481
12482 2
        return $return;
12483
    }
12484
12485
    /**
12486
     * rxClass
12487
     *
12488
     * @param string $s
12489
     * @param string $class
12490
     *
12491
     * @return string
12492
     */
12493 33
    private static function rxClass(string $s, string $class = ''): string
12494
    {
12495 33
        static $RX_CLASS_CACHE = [];
12496
12497 33
        $cache_key = $s . $class;
12498
12499 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12500 21
            return $RX_CLASS_CACHE[$cache_key];
12501
        }
12502
12503 16
        $class_array = [$class];
12504
12505
        /** @noinspection SuspiciousLoopInspection */
12506
        /** @noinspection AlterInForeachInspection */
12507 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12508 15
            if ($s === '-') {
12509
                $class_array[0] = '-' . $class_array[0];
12510 15
            } elseif (!isset($s[2])) {
12511 15
                $class_array[0] .= \preg_quote($s, '/');
12512 1
            } elseif (self::strlen($s) === 1) {
12513 1
                $class_array[0] .= $s;
12514
            } else {
12515 15
                $class_array[] = $s;
12516
            }
12517
        }
12518
12519 16
        if ($class_array[0]) {
12520 16
            $class_array[0] = '[' . $class_array[0] . ']';
12521
        }
12522
12523 16
        if (\count($class_array) === 1) {
12524 16
            $return = $class_array[0];
12525
        } else {
12526
            $return = '(?:' . \implode('|', $class_array) . ')';
12527
        }
12528
12529 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12530
12531 16
        return $return;
12532
    }
12533
12534
    /**
12535
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12536
     *
12537
     * @param string $names
12538
     * @param string $delimiter
12539
     * @param string $encoding
12540
     *
12541
     * @return string
12542
     */
12543 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12544
    {
12545
        // init
12546 1
        $name_helper_array = \explode($delimiter, $names);
12547 1
        if ($name_helper_array === false) {
12548
            return '';
12549
        }
12550
12551
        $special_cases = [
12552 1
            'names' => [
12553
                'ab',
12554
                'af',
12555
                'al',
12556
                'and',
12557
                'ap',
12558
                'bint',
12559
                'binte',
12560
                'da',
12561
                'de',
12562
                'del',
12563
                'den',
12564
                'der',
12565
                'di',
12566
                'dit',
12567
                'ibn',
12568
                'la',
12569
                'mac',
12570
                'nic',
12571
                'of',
12572
                'ter',
12573
                'the',
12574
                'und',
12575
                'van',
12576
                'von',
12577
                'y',
12578
                'zu',
12579
            ],
12580
            'prefixes' => [
12581
                'al-',
12582
                "d'",
12583
                'ff',
12584
                "l'",
12585
                'mac',
12586
                'mc',
12587
                'nic',
12588
            ],
12589
        ];
12590
12591 1
        foreach ($name_helper_array as &$name) {
12592 1
            if (\in_array($name, $special_cases['names'], true)) {
12593 1
                continue;
12594
            }
12595
12596 1
            $continue = false;
12597
12598 1
            if ($delimiter === '-') {
12599
                /** @noinspection AlterInForeachInspection */
12600 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12601 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12602 1
                        $continue = true;
12603
                    }
12604
                }
12605
            }
12606
12607
            /** @noinspection AlterInForeachInspection */
12608 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12609 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12610 1
                    $continue = true;
12611
                }
12612
            }
12613
12614 1
            if ($continue === true) {
12615 1
                continue;
12616
            }
12617
12618 1
            $name = self::ucfirst($name);
12619
        }
12620
12621 1
        return \implode($delimiter, $name_helper_array);
12622
    }
12623
12624
    /**
12625
     * Generic case-sensitive transformation for collation matching.
12626
     *
12627
     * @param string $str <p>The input string</p>
12628
     *
12629
     * @return string|null
12630
     */
12631 6
    private static function strtonatfold(string $str)
12632
    {
12633
        /** @noinspection PhpUndefinedClassInspection */
12634 6
        return \preg_replace(
12635 6
            '/\p{Mn}+/u',
12636 6
            '',
12637 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12638
        );
12639
    }
12640
12641
    /**
12642
     * @param int|string $input
12643
     *
12644
     * @return string
12645
     */
12646 31
    private static function to_utf8_convert_helper($input): string
12647
    {
12648
        // init
12649 31
        $buf = '';
12650
12651 31
        if (self::$ORD === null) {
12652 1
            self::$ORD = self::getData('ord');
12653
        }
12654
12655 31
        if (self::$CHR === null) {
12656 1
            self::$CHR = self::getData('chr');
12657
        }
12658
12659 31
        if (self::$WIN1252_TO_UTF8 === null) {
12660 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12661
        }
12662
12663 31
        $ordC1 = self::$ORD[$input];
12664 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12665 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12666
        } else {
12667
            /** @noinspection OffsetOperationsInspection */
12668 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12669 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12670 1
            $buf .= $cc1 . $cc2;
12671
        }
12672
12673 31
        return $buf;
12674
    }
12675
12676
    /**
12677
     * @param string $str
12678
     *
12679
     * @return string
12680
     */
12681 9
    private static function urldecode_unicode_helper(string $str): string
12682
    {
12683 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12684 9
        if (\preg_match($pattern, $str)) {
12685 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12686
        }
12687
12688 9
        return $str;
12689
    }
12690
}
12691