Passed
Push — master ( b73a3a...881454 )
by Lars
03:43
created

UTF8   F

Complexity

Total Complexity 1681

Size/Duplication

Total Lines 12659
Duplicated Lines 0 %

Test Coverage

Coverage 80.02%

Importance

Changes 90
Bugs 49 Features 4
Metric Value
eloc 4304
c 90
b 49
f 4
dl 0
loc 12659
ccs 3039
cts 3798
cp 0.8002
rs 0.8
wmc 1681

298 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A __construct() 0 2 1
A access() 0 11 4
A str_substr_after_first_separator() 0 28 6
A str_surround() 0 3 1
A toUTF8() 0 3 1
A toLatin1() 0 3 1
A toIso8859() 0 3 1
A strip_whitespace() 0 7 2
A chr_to_decimal() 0 30 6
A file_has_bom() 0 8 2
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 70 10
A parse_str() 0 16 4
A filter_input() 0 13 2
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A array_change_key_case() 0 23 5
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 25 5
A substr_left() 0 15 4
A count_chars() 0 11 1
D strlen() 0 99 19
A str_isubstr_last() 0 25 4
A ctype_loaded() 0 3 1
A str_replace_beginning() 0 24 6
A has_uppercase() 0 8 2
A remove_left() 0 24 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
C utf8_decode() 0 60 13
A ltrim() 0 19 4
A emoji_decode() 0 18 2
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A lcword() 0 8 1
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
D chr() 0 101 18
A html_escape() 0 6 1
A string() 0 10 1
C normalize_encoding() 0 134 14
B rxClass() 0 39 8
B get_file_type() 0 59 7
A str_ensure_right() 0 13 4
A chr_to_int() 0 3 1
B str_titleize_for_humans() 0 155 5
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 59 13
A normalize_whitespace() 0 9 1
A str_starts_with() 0 11 3
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 14 2
A decode_mimeheader() 0 15 5
C substr_count_in_byte() 0 54 15
A html_decode() 0 6 1
A strchr() 0 13 1
A strichr() 0 13 1
A isUtf32() 0 3 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A chunk_split() 0 3 1
A titlecase() 0 31 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A emoji_encode() 0 18 2
A str_matches_pattern() 0 3 1
A is_alpha() 0 8 2
C str_titleize() 0 69 12
A ws() 0 3 1
B get_random_string() 0 56 10
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A str_pad_right() 0 12 1
B ucwords() 0 48 9
A first_char() 0 14 4
A to_boolean() 0 35 5
C stristr() 0 68 15
A isUtf8() 0 3 1
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A css_stripe_media_queries() 0 6 1
A trim() 0 19 4
A clean() 0 47 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 8 2
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 34 8
A str_contains_all() 0 23 6
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 65 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B rawurldecode() 0 37 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 79 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 8 2
A str_replace() 0 14 1
A substr_iright() 0 15 4
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A filter_var_array() 0 12 2
A decimal_to_chr() 0 3 1
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
B between() 0 48 8
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 78 18
A codepoints() 0 29 4
A substr_right() 0 31 6
A lowerCaseFirst() 0 8 1
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A chr_map() 0 5 1
A strrpos_in_byte() 0 12 4
A cleanup() 0 25 2
F strrpos() 0 118 25
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A char_at() 0 7 2
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
A chars() 0 3 1
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
A finfo_loaded() 0 3 1
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
A fits_inside() 0 3 1
A to_ascii() 0 6 1
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 26 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
B str_snakeize() 0 55 6
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A hasBom() 0 3 1
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 55 11
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A str_upper_first() 0 13 1
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A filter_var() 0 12 2
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 45 7
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 140 37
B urldecode_fix_win1252_chars() 0 227 1
C is_utf32() 0 65 16
C ord() 0 72 16
A is_alphanumeric() 0 8 2
A strtonatfold() 0 7 1
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
C strcspn() 0 52 12
A checkForSupport() 0 47 4
B is_json() 0 29 8
A fixStrCaseHelper() 0 36 5
A int_to_hex() 0 7 2
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 20 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 3 1
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A isJson() 0 3 1
A wordwrap_per_line() 0 28 5
A strncmp() 0 19 4
A filter_input_array() 0 12 2
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 38 5
A chr_to_hex() 0 11 3
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A min() 0 14 3
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 89 17
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 26 4
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
B file_get_contents() 0 56 11
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 9 2
A callback() 0 3 1
A symfony_polyfill_used() 0 16 5
A binary_to_str() 0 12 3
A bom() 0 3 1
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // IDEOGRAPHIC SPACE
92
        12288 => "\xe3\x80\x80",
93
    ];
94
95
    /**
96
     * @var array
97
     */
98
    private static $WHITESPACE_TABLE = [
99
        'SPACE'                     => "\x20",
100
        'NO-BREAK SPACE'            => "\xc2\xa0",
101
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
102
        'EN QUAD'                   => "\xe2\x80\x80",
103
        'EM QUAD'                   => "\xe2\x80\x81",
104
        'EN SPACE'                  => "\xe2\x80\x82",
105
        'EM SPACE'                  => "\xe2\x80\x83",
106
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
107
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
108
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
109
        'FIGURE SPACE'              => "\xe2\x80\x87",
110
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
111
        'THIN SPACE'                => "\xe2\x80\x89",
112
        'HAIR SPACE'                => "\xe2\x80\x8a",
113
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
114
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
115
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
116
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
117
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
118
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
119
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
120
    ];
121
122
    /**
123
     * @var array{upper: string[], lower: string[]}
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{upper at position 0 could not be parsed: Unknown type name 'array{upper' at position 0 in array{upper.
Loading history...
124
     */
125
    private static $COMMON_CASE_FOLD = [
126
        'upper' => [
127
            'µ',
128
            'ſ',
129
            "\xCD\x85",
130
            'ς',
131
            'ẞ',
132
            "\xCF\x90",
133
            "\xCF\x91",
134
            "\xCF\x95",
135
            "\xCF\x96",
136
            "\xCF\xB0",
137
            "\xCF\xB1",
138
            "\xCF\xB5",
139
            "\xE1\xBA\x9B",
140
            "\xE1\xBE\xBE",
141
        ],
142
        'lower' => [
143
            'μ',
144
            's',
145
            'ι',
146
            'σ',
147
            'ß',
148
            'β',
149
            'θ',
150
            'φ',
151
            'π',
152
            'κ',
153
            'ρ',
154
            'ε',
155
            "\xE1\xB9\xA1",
156
            'ι',
157
        ],
158
    ];
159
160
    /**
161
     * @var array
162
     */
163
    private static $SUPPORT = [];
164
165
    /**
166
     * @var array|null
167
     */
168
    private static $BROKEN_UTF8_FIX;
169
170
    /**
171
     * @var array|null
172
     */
173
    private static $WIN1252_TO_UTF8;
174
175
    /**
176
     * @var array|null
177
     */
178
    private static $INTL_TRANSLITERATOR_LIST;
179
180
    /**
181
     * @var array|null
182
     */
183
    private static $ENCODINGS;
184
185
    /**
186
     * @var array|null
187
     */
188
    private static $ORD;
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $EMOJI;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $EMOJI_VALUES_CACHE;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $EMOJI_KEYS_CACHE;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $CHR;
214
215
    /**
216
     * __construct()
217
     */
218 33
    public function __construct()
219
    {
220 33
    }
221
222
    /**
223
     * Return the character at the specified position: $str[1] like functionality.
224
     *
225
     * @param string $str      <p>A UTF-8 string.</p>
226
     * @param int    $pos      <p>The position of character to return.</p>
227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
228
     *
229
     * @return string single multi-byte character
230
     */
231 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
232
    {
233 3
        if ($str === '' || $pos < 0) {
234 2
            return '';
235
        }
236
237 3
        if ($encoding === 'UTF-8') {
238 3
            return (string) \mb_substr($str, $pos, 1);
239
        }
240
241
        return (string) self::substr($str, $pos, 1, $encoding);
242
    }
243
244
    /**
245
     * Prepends UTF-8 BOM character to the string and returns the whole string.
246
     *
247
     * INFO: If BOM already existed there, the Input string is returned.
248
     *
249
     * @param string $str <p>The input string.</p>
250
     *
251
     * @return string the output string that contains BOM
252
     */
253 2
    public static function add_bom_to_string(string $str): string
254
    {
255 2
        if (self::string_has_bom($str) === false) {
256 2
            $str = self::bom() . $str;
257
        }
258
259 2
        return $str;
260
    }
261
262
    /**
263
     * Changes all keys in an array.
264
     *
265
     * @param array  $array    <p>The array to work on</p>
266
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
267
     *                         or <strong>CASE_LOWER</strong> (default)</p>
268
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
269
     *
270
     * @return string[]
271
     *                  <p>An array with its keys lower- or uppercased.</p>
272
     */
273 2
    public static function array_change_key_case(
274
        array $array,
275
        int $case = \CASE_LOWER,
276
        string $encoding = 'UTF-8'
277
    ): array {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => &$value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower((string) $key, $encoding)
290 2
                : self::strtoupper((string) $key, $encoding);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(
312
        string $str,
313
        string $start,
314
        string $end,
315
        int $offset = 0,
316
        string $encoding = 'UTF-8'
317
    ): string {
318 16
        if ($encoding === 'UTF-8') {
319 8
            $start_position = \mb_strpos($str, $start, $offset);
320 8
            if ($start_position === false) {
321 1
                return '';
322
            }
323
324 7
            $substr_index = $start_position + (int) \mb_strlen($start);
325 7
            $end_position = \mb_strpos($str, $end, $substr_index);
326
            if (
327 7
                $end_position === false
328
                ||
329 7
                $end_position === $substr_index
330
            ) {
331 2
                return '';
332
            }
333
334 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
335
        }
336
337 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
338
339 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
340 8
        if ($start_position === false) {
341 1
            return '';
342
        }
343
344 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
345 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
346
        if (
347 7
            $end_position === false
348
            ||
349 7
            $end_position === $substr_index
350
        ) {
351 2
            return '';
352
        }
353
354 5
        return (string) self::substr(
355 5
            $str,
356 5
            $substr_index,
357 5
            $end_position - $substr_index,
358 5
            $encoding
359
        );
360
    }
361
362
    /**
363
     * Convert binary into an string.
364
     *
365
     * @param mixed $bin 1|0
366
     *
367
     * @return string
368
     */
369 2
    public static function binary_to_str($bin): string
370
    {
371 2
        if (!isset($bin[0])) {
372
            return '';
373
        }
374
375 2
        $convert = \base_convert($bin, 2, 16);
376 2
        if ($convert === '0') {
377 1
            return '';
378
        }
379
380 2
        return \pack('H*', $convert);
381
    }
382
383
    /**
384
     * Returns the UTF-8 Byte Order Mark Character.
385
     *
386
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
387
     *
388
     * @return string UTF-8 Byte Order Mark
389
     */
390 4
    public static function bom(): string
391
    {
392 4
        return "\xef\xbb\xbf";
393
    }
394
395
    /**
396
     * @alias of UTF8::chr_map()
397
     *
398
     * @param array|string $callback
399
     * @param string       $str
400
     *
401
     * @return string[]
402
     *
403
     * @see UTF8::chr_map()
404
     */
405 2
    public static function callback($callback, string $str): array
406
    {
407 2
        return self::chr_map($callback, $str);
408
    }
409
410
    /**
411
     * Returns the character at $index, with indexes starting at 0.
412
     *
413
     * @param string $str      <p>The input string.</p>
414
     * @param int    $index    <p>Position of the character.</p>
415
     * @param string $encoding [optional] <p>Default is UTF-8</p>
416
     *
417
     * @return string the character at $index
418
     */
419 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
420
    {
421 9
        if ($encoding === 'UTF-8') {
422 5
            return (string) \mb_substr($str, $index, 1);
423
        }
424
425 4
        return (string) self::substr($str, $index, 1, $encoding);
426
    }
427
428
    /**
429
     * Returns an array consisting of the characters in the string.
430
     *
431
     * @param string $str <p>The input string.</p>
432
     *
433
     * @return string[] an array of chars
434
     */
435 3
    public static function chars(string $str): array
436
    {
437 3
        return self::str_split($str);
438
    }
439
440
    /**
441
     * This method will auto-detect your server environment for UTF-8 support.
442
     *
443
     * @return true|null
444
     *
445
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
446
     */
447 5
    public static function checkForSupport()
448
    {
449 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
450
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
451
452
            // http://php.net/manual/en/book.mbstring.php
453
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
454
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
455
            if (self::$SUPPORT['mbstring'] === true) {
456
                \mb_internal_encoding('UTF-8');
457
                /** @noinspection UnusedFunctionResultInspection */
458
                /** @noinspection PhpComposerExtensionStubsInspection */
459
                \mb_regex_encoding('UTF-8');
460
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
461
            }
462
463
            // http://php.net/manual/en/book.iconv.php
464
            self::$SUPPORT['iconv'] = self::iconv_loaded();
465
466
            // http://php.net/manual/en/book.intl.php
467
            self::$SUPPORT['intl'] = self::intl_loaded();
468
469
            // http://php.net/manual/en/class.intlchar.php
470
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
471
472
            // http://php.net/manual/en/book.ctype.php
473
            self::$SUPPORT['ctype'] = self::ctype_loaded();
474
475
            // http://php.net/manual/en/class.finfo.php
476
            self::$SUPPORT['finfo'] = self::finfo_loaded();
477
478
            // http://php.net/manual/en/book.json.php
479
            self::$SUPPORT['json'] = self::json_loaded();
480
481
            // http://php.net/manual/en/book.pcre.php
482
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
483
484
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
485
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
486
                \mb_internal_encoding('UTF-8');
487
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
488
            }
489
490
            return true;
491
        }
492
493 5
        return null;
494
    }
495
496
    /**
497
     * Generates a UTF-8 encoded character from the given code point.
498
     *
499
     * INFO: opposite to UTF8::ord()
500
     *
501
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
502
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
503
     *
504
     * @return string|null multi-byte character, returns null on failure or empty input
505
     */
506 25
    public static function chr($code_point, string $encoding = 'UTF-8')
507
    {
508
        // init
509 25
        static $CHAR_CACHE = [];
510
511 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
512 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
513
        }
514
515
        if (
516 25
            $encoding !== 'UTF-8'
517
            &&
518 25
            $encoding !== 'ISO-8859-1'
519
            &&
520 25
            $encoding !== 'WINDOWS-1252'
521
            &&
522 25
            self::$SUPPORT['mbstring'] === false
523
        ) {
524
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
525
        }
526
527 25
        $cache_key = $code_point . $encoding;
528 25
        if (isset($CHAR_CACHE[$cache_key]) === true) {
529 23
            return $CHAR_CACHE[$cache_key];
530
        }
531
532 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
533
534 13
            if (self::$CHR === null) {
535
                self::$CHR = self::getData('chr');
536
            }
537
538
            /**
539
             * @psalm-suppress PossiblyNullArrayAccess
540
             */
541 13
            $chr = self::$CHR[$code_point];
542
543 13
            if ($encoding !== 'UTF-8') {
544 1
                $chr = self::encode($encoding, $chr);
545
            }
546
547 13
            return $CHAR_CACHE[$cache_key] = $chr;
548
        }
549
550
        //
551
        // fallback via "IntlChar"
552
        //
553
554 7
        if (self::$SUPPORT['intlChar'] === true) {
555
            /** @noinspection PhpComposerExtensionStubsInspection */
556 7
            $chr = \IntlChar::chr($code_point);
557
558 7
            if ($encoding !== 'UTF-8') {
559
                $chr = self::encode($encoding, $chr);
560
            }
561
562 7
            return $CHAR_CACHE[$cache_key] = $chr;
563
        }
564
565
        //
566
        // fallback via vanilla php
567
        //
568
569
        if (self::$CHR === null) {
570
            self::$CHR = self::getData('chr');
571
        }
572
573
        $code_point = (int) $code_point;
574
        if ($code_point <= 0x7F) {
575
            /**
576
             * @psalm-suppress PossiblyNullArrayAccess
577
             */
578
            $chr = self::$CHR[$code_point];
579
        } elseif ($code_point <= 0x7FF) {
580
            /**
581
             * @psalm-suppress PossiblyNullArrayAccess
582
             */
583
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
584
                   self::$CHR[($code_point & 0x3F) + 0x80];
585
        } elseif ($code_point <= 0xFFFF) {
586
            /**
587
             * @psalm-suppress PossiblyNullArrayAccess
588
             */
589
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
590
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
591
                   self::$CHR[($code_point & 0x3F) + 0x80];
592
        } else {
593
            /**
594
             * @psalm-suppress PossiblyNullArrayAccess
595
             */
596
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
597
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
598
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
599
                   self::$CHR[($code_point & 0x3F) + 0x80];
600
        }
601
602
        if ($encoding !== 'UTF-8') {
603
            $chr = self::encode($encoding, $chr);
604
        }
605
606
        return $CHAR_CACHE[$cache_key] = $chr;
607
    }
608
609
    /**
610
     * Applies callback to all characters of a string.
611
     *
612
     * @param array|string $callback <p>The callback function.</p>
613
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
614
     *
615
     * @return string[] the outcome of callback
616
     */
617 2
    public static function chr_map($callback, string $str): array
618
    {
619 2
        return \array_map(
620 2
            $callback,
621 2
            self::str_split($str)
622
        );
623
    }
624
625
    /**
626
     * Generates an array of byte length of each character of a Unicode string.
627
     *
628
     * 1 byte => U+0000  - U+007F
629
     * 2 byte => U+0080  - U+07FF
630
     * 3 byte => U+0800  - U+FFFF
631
     * 4 byte => U+10000 - U+10FFFF
632
     *
633
     * @param string $str <p>The original unicode string.</p>
634
     *
635
     * @return int[] an array of byte lengths of each character
636
     */
637 4
    public static function chr_size_list(string $str): array
638
    {
639 4
        if ($str === '') {
640 4
            return [];
641
        }
642
643 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
644
            return \array_map(
645
                static function (string $data): int {
646
                    // "mb_" is available if overload is used, so use it ...
647
                    return \mb_strlen($data, 'CP850'); // 8-BIT
648
                },
649
                self::str_split($str)
650
            );
651
        }
652
653 4
        return \array_map('\strlen', self::str_split($str));
654
    }
655
656
    /**
657
     * Get a decimal code representation of a specific character.
658
     *
659
     * @param string $char <p>The input character.</p>
660
     *
661
     * @return int
662
     */
663 4
    public static function chr_to_decimal(string $char): int
664
    {
665 4
        $code = self::ord($char[0]);
666 4
        $bytes = 1;
667
668 4
        if (!($code & 0x80)) {
669
            // 0xxxxxxx
670 4
            return $code;
671
        }
672
673 4
        if (($code & 0xe0) === 0xc0) {
674
            // 110xxxxx
675 4
            $bytes = 2;
676 4
            $code &= ~0xc0;
677 4
        } elseif (($code & 0xf0) === 0xe0) {
678
            // 1110xxxx
679 4
            $bytes = 3;
680 4
            $code &= ~0xe0;
681 2
        } elseif (($code & 0xf8) === 0xf0) {
682
            // 11110xxx
683 2
            $bytes = 4;
684 2
            $code &= ~0xf0;
685
        }
686
687 4
        for ($i = 2; $i <= $bytes; ++$i) {
688
            // 10xxxxxx
689 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
690
        }
691
692 4
        return $code;
693
    }
694
695
    /**
696
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
697
     *
698
     * @param int|string $char   <p>The input character</p>
699
     * @param string     $prefix [optional]
700
     *
701
     * @return string The code point encoded as U+xxxx
702
     */
703 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
704
    {
705 2
        if ($char === '') {
706 2
            return '';
707
        }
708
709 2
        if ($char === '&#0;') {
710 2
            $char = '';
711
        }
712
713 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
714
    }
715
716
    /**
717
     * alias for "UTF8::chr_to_decimal()"
718
     *
719
     * @param string $chr
720
     *
721
     * @return int
722
     *
723
     * @see UTF8::chr_to_decimal()
724
     */
725 2
    public static function chr_to_int(string $chr): int
726
    {
727 2
        return self::chr_to_decimal($chr);
728
    }
729
730
    /**
731
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
732
     *
733
     * @param string $body         <p>The original string to be split.</p>
734
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
735
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
736
     *
737
     * @return string the chunked string
738
     */
739 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
740
    {
741 4
        return \implode($end, self::str_split($body, $chunk_length));
742
    }
743
744
    /**
745
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
746
     *
747
     * @param string $str                           <p>The string to be sanitized.</p>
748
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
749
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
750
     *                                              whitespace.</p>
751
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
752
     *                                              e.g.: "…"
753
     *                                              => "..."</p>
754
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
755
     *                                              combination with
756
     *                                              $normalize_whitespace</p>
757
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
758
     *                                              mark e.g.: "�"</p>
759
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
760
     *                                              characters e.g.: "\0"</p>
761
     *
762
     * @return string clean UTF-8 encoded string
763
     */
764 87
    public static function clean(
765
        string $str,
766
        bool $remove_bom = false,
767
        bool $normalize_whitespace = false,
768
        bool $normalize_msword = false,
769
        bool $keep_non_breaking_space = false,
770
        bool $replace_diamond_question_mark = false,
771
        bool $remove_invisible_characters = true
772
    ): string {
773
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
774
        // caused connection reset problem on larger strings
775
776 87
        $regex = '/
777
          (
778
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
779
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
780
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
781
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
782
            ){1,100}                      # ...one or more times
783
          )
784
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
785
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
786
        /x';
787
        /** @noinspection NotOptimalRegularExpressionsInspection */
788 87
        $str = (string) \preg_replace($regex, '$1', $str);
789
790 87
        if ($replace_diamond_question_mark === true) {
791 33
            $str = self::replace_diamond_question_mark($str, '');
792
        }
793
794 87
        if ($remove_invisible_characters === true) {
795 87
            $str = self::remove_invisible_characters($str);
796
        }
797
798 87
        if ($normalize_whitespace === true) {
799 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
800
        }
801
802 87
        if ($normalize_msword === true) {
803 4
            $str = self::normalize_msword($str);
804
        }
805
806 87
        if ($remove_bom === true) {
807 37
            $str = self::remove_bom($str);
808
        }
809
810 87
        return $str;
811
    }
812
813
    /**
814
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
815
     *
816
     * @param string $str <p>The input string.</p>
817
     *
818
     * @return string
819
     */
820 33
    public static function cleanup($str): string
821
    {
822
        // init
823 33
        $str = (string) $str;
824
825 33
        if ($str === '') {
826 5
            return '';
827
        }
828
829
        // fixed ISO <-> UTF-8 Errors
830 33
        $str = self::fix_simple_utf8($str);
831
832
        // remove all none UTF-8 symbols
833
        // && remove diamond question mark (�)
834
        // && remove remove invisible characters (e.g. "\0")
835
        // && remove BOM
836
        // && normalize whitespace chars (but keep non-breaking-spaces)
837 33
        return self::clean(
838 33
            $str,
839 33
            true,
840 33
            true,
841 33
            false,
842 33
            true,
843 33
            true,
844 33
            true
845
        );
846
    }
847
848
    /**
849
     * Accepts a string or a array of strings and returns an array of Unicode code points.
850
     *
851
     * INFO: opposite to UTF8::string()
852
     *
853
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
854
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
855
     *                                 default, code points will be returned as integers.</p>
856
     *
857
     * @return array<int|string>
858
     *                           The array of code points:<br>
859
     *                           array<int> for $u_style === false<br>
860
     *                           array<string> for $u_style === true<br>
861
     */
862 12
    public static function codepoints($arg, bool $u_style = false): array
863
    {
864 12
        if (\is_string($arg) === true) {
865 12
            $arg = self::str_split($arg);
866
        }
867
868 12
        $arg = \array_map(
869
            [
870 12
                self::class,
871
                'ord',
872
            ],
873 12
            $arg
874
        );
875
876 12
        if (\count($arg) === 0) {
877 7
            return [];
878
        }
879
880 11
        if ($u_style === true) {
881 2
            $arg = \array_map(
882
                [
883 2
                    self::class,
884
                    'int_to_hex',
885
                ],
886 2
                $arg
887
            );
888
        }
889
890 11
        return $arg;
891
    }
892
893
    /**
894
     * Trims the string and replaces consecutive whitespace characters with a
895
     * single space. This includes tabs and newline characters, as well as
896
     * multibyte whitespace such as the thin space and ideographic space.
897
     *
898
     * @param string $str <p>The input string.</p>
899
     *
900
     * @return string string with a trimmed $str and condensed whitespace
901
     */
902 13
    public static function collapse_whitespace(string $str): string
903
    {
904 13
        if (self::$SUPPORT['mbstring'] === true) {
905
            /** @noinspection PhpComposerExtensionStubsInspection */
906 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
907
        }
908
909
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
910
    }
911
912
    /**
913
     * Returns count of characters used in a string.
914
     *
915
     * @param string $str                     <p>The input string.</p>
916
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
917
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
918
     *
919
     * @return int[] an associative array of Character as keys and
920
     *               their count as values
921
     */
922 19
    public static function count_chars(
923
        string $str,
924
        bool $clean_utf8 = false,
925
        bool $try_to_use_mb_functions = true
926
    ): array {
927 19
        return \array_count_values(
928 19
            self::str_split(
929 19
                $str,
930 19
                1,
931 19
                $clean_utf8,
932 19
                $try_to_use_mb_functions
933
            )
934
        );
935
    }
936
937
    /**
938
     * Remove css media-queries.
939
     *
940
     * @param string $str
941
     *
942
     * @return string
943
     */
944 1
    public static function css_stripe_media_queries(string $str): string
945
    {
946 1
        return (string) \preg_replace(
947 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
948 1
            '',
949 1
            $str
950
        );
951
    }
952
953
    /**
954
     * Checks whether ctype is available on the server.
955
     *
956
     * @return bool
957
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
958
     */
959
    public static function ctype_loaded(): bool
960
    {
961
        return \extension_loaded('ctype');
962
    }
963
964
    /**
965
     * Converts a int-value into an UTF-8 character.
966
     *
967
     * @param mixed $int
968
     *
969
     * @return string
970
     */
971 19
    public static function decimal_to_chr($int): string
972
    {
973 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
974
    }
975
976
    /**
977
     * Decodes a MIME header field
978
     *
979
     * @param string $str
980
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
981
     *
982
     * @return false|string
983
     *                      A decoded MIME field on success,
984
     *                      or false if an error occurs during the decoding
985
     */
986
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
987
    {
988
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
989
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
990
        }
991
992
        if (self::$SUPPORT['iconv'] === true) {
993
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
994
        }
995
996
        if ($encoding !== 'UTF-8') {
997
            $str = self::encode($encoding, $str);
998
        }
999
1000
        return \mb_decode_mimeheader($str);
1001
    }
1002
1003
    /**
1004
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1005
     *
1006
     * @param string $str                            <p>The input string.</p>
1007
     * @param bool   $use_reversible_string_mappings [optional] <p>
1008
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1009
     *                                               between "emoji_encode" and "emoji_decode".</p>
1010
     *
1011
     * @return string
1012
     */
1013 9
    public static function emoji_decode(
1014
        string $str,
1015
        bool $use_reversible_string_mappings = false
1016
    ): string {
1017 9
        self::initEmojiData();
1018
1019 9
        if ($use_reversible_string_mappings === true) {
1020 9
            return (string) \str_replace(
1021 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1022 9
                (array) self::$EMOJI_VALUES_CACHE,
1023 9
                $str
1024
            );
1025
        }
1026
1027 1
        return (string) \str_replace(
1028 1
            (array) self::$EMOJI_KEYS_CACHE,
1029 1
            (array) self::$EMOJI_VALUES_CACHE,
1030 1
            $str
1031
        );
1032
    }
1033
1034
    /**
1035
     * Encode a string with emoji chars into a non-emoji string.
1036
     *
1037
     * @param string $str                            <p>The input string</p>
1038
     * @param bool   $use_reversible_string_mappings [optional] <p>
1039
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1040
     *                                               between "emoji_encode" and "emoji_decode"</p>
1041
     *
1042
     * @return string
1043
     */
1044 12
    public static function emoji_encode(
1045
        string $str,
1046
        bool $use_reversible_string_mappings = false
1047
    ): string {
1048 12
        self::initEmojiData();
1049
1050 12
        if ($use_reversible_string_mappings === true) {
1051 9
            return (string) \str_replace(
1052 9
                (array) self::$EMOJI_VALUES_CACHE,
1053 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1054 9
                $str
1055
            );
1056
        }
1057
1058 4
        return (string) \str_replace(
1059 4
            (array) self::$EMOJI_VALUES_CACHE,
1060 4
            (array) self::$EMOJI_KEYS_CACHE,
1061 4
            $str
1062
        );
1063
    }
1064
1065
    /**
1066
     * Encode a string with a new charset-encoding.
1067
     *
1068
     * INFO:  This function will also try to fix broken / double encoding,
1069
     *        so you can call this function also on a UTF-8 string and you don't mess the string.
1070
     *
1071
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1072
     * @param string $str                           <p>The input string</p>
1073
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1074
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1075
     *                                              string-encoding</p>
1076
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1077
     *                                              A empty string will trigger the autodetect anyway.</p>
1078
     *
1079
     * @return string
1080
     *
1081
     * @psalm-suppress InvalidReturnStatement
1082
     */
1083 28
    public static function encode(
1084
        string $to_encoding,
1085
        string $str,
1086
        bool $auto_detect_the_from_encoding = true,
1087
        string $from_encoding = ''
1088
    ): string {
1089 28
        if ($str === '' || $to_encoding === '') {
1090 13
            return $str;
1091
        }
1092
1093 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1094 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1095
        }
1096
1097 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1098 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1099
        }
1100
1101
        if (
1102 28
            $to_encoding
1103
            &&
1104 28
            $from_encoding
1105
            &&
1106 28
            $from_encoding === $to_encoding
1107
        ) {
1108
            return $str;
1109
        }
1110
1111 28
        if ($to_encoding === 'JSON') {
1112 1
            $return = self::json_encode($str);
1113 1
            if ($return === false) {
1114
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1115
            }
1116
1117 1
            return $return;
1118
        }
1119 28
        if ($from_encoding === 'JSON') {
1120 1
            $str = self::json_decode($str);
1121 1
            $from_encoding = '';
1122
        }
1123
1124 28
        if ($to_encoding === 'BASE64') {
1125 2
            return \base64_encode($str);
1126
        }
1127 28
        if ($from_encoding === 'BASE64') {
1128 2
            $str = \base64_decode($str, true);
1129 2
            $from_encoding = '';
1130
        }
1131
1132 28
        if ($to_encoding === 'HTML-ENTITIES') {
1133 2
            return self::html_encode($str, true, 'UTF-8');
1134
        }
1135 28
        if ($from_encoding === 'HTML-ENTITIES') {
1136 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1137 2
            $from_encoding = '';
1138
        }
1139
1140 28
        $from_encoding_auto_detected = false;
1141
        if (
1142 28
            $auto_detect_the_from_encoding === true
1143
            ||
1144 28
            !$from_encoding
1145
        ) {
1146 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1147
        }
1148
1149
        // DEBUG
1150
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1151
1152 28
        if ($from_encoding_auto_detected !== false) {
1153
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1154 24
            $from_encoding = $from_encoding_auto_detected;
1155 7
        } elseif ($auto_detect_the_from_encoding === true) {
1156
            // fallback for the "autodetect"-mode
1157 7
            return self::to_utf8($str);
1158
        }
1159
1160
        if (
1161 24
            !$from_encoding
1162
            ||
1163 24
            $from_encoding === $to_encoding
1164
        ) {
1165 15
            return $str;
1166
        }
1167
1168
        if (
1169 19
            $to_encoding === 'UTF-8'
1170
            &&
1171
            (
1172 17
                $from_encoding === 'WINDOWS-1252'
1173
                ||
1174 19
                $from_encoding === 'ISO-8859-1'
1175
            )
1176
        ) {
1177 13
            return self::to_utf8($str);
1178
        }
1179
1180
        if (
1181 12
            $to_encoding === 'ISO-8859-1'
1182
            &&
1183
            (
1184 6
                $from_encoding === 'WINDOWS-1252'
1185
                ||
1186 12
                $from_encoding === 'UTF-8'
1187
            )
1188
        ) {
1189 6
            return self::to_iso8859($str);
1190
        }
1191
1192
        if (
1193 10
            $to_encoding !== 'UTF-8'
1194
            &&
1195 10
            $to_encoding !== 'ISO-8859-1'
1196
            &&
1197 10
            $to_encoding !== 'WINDOWS-1252'
1198
            &&
1199 10
            self::$SUPPORT['mbstring'] === false
1200
        ) {
1201
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1202
        }
1203
1204 10
        if (self::$SUPPORT['mbstring'] === true) {
1205
            // warning: do not use the symfony polyfill here
1206 10
            $str_encoded = \mb_convert_encoding(
1207 10
                $str,
1208 10
                $to_encoding,
1209 10
                $from_encoding
1210
            );
1211
1212 10
            if ($str_encoded) {
1213 10
                return $str_encoded;
1214
            }
1215
        }
1216
1217
        $return = \iconv($from_encoding, $to_encoding, $str);
1218
        if ($return !== false) {
1219
            return $return;
1220
        }
1221
1222
        return $str;
1223
    }
1224
1225
    /**
1226
     * @param string $str
1227
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1228
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1229
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1230
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1231
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1232
     *
1233
     * @return false|string
1234
     *                      <p>An encoded MIME field on success,
1235
     *                      or false if an error occurs during the encoding.</p>
1236
     */
1237
    public static function encode_mimeheader(
1238
        $str,
1239
        $from_charset = 'UTF-8',
1240
        $to_charset = 'UTF-8',
1241
        $transfer_encoding = 'Q',
1242
        $linefeed = '\\r\\n',
1243
        $indent = 76
1244
    ) {
1245
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1246
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1247
        }
1248
1249
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1250
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1251
        }
1252
1253
        return \iconv_mime_encode(
1254
            '',
1255
            $str,
1256
            [
1257
                'scheme'           => $transfer_encoding,
1258
                'line-length'      => $indent,
1259
                'input-charset'    => $from_charset,
1260
                'output-charset'   => $to_charset,
1261
                'line-break-chars' => $linefeed,
1262
            ]
1263
        );
1264
    }
1265
1266
    /**
1267
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1268
     *
1269
     * @param string   $str                       <p>The input string.</p>
1270
     * @param string   $search                    <p>The searched string.</p>
1271
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1272
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1273
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1274
     *
1275
     * @return string
1276
     */
1277 1
    public static function extract_text(
1278
        string $str,
1279
        string $search = '',
1280
        int $length = null,
1281
        string $replacer_for_skipped_text = '…',
1282
        string $encoding = 'UTF-8'
1283
    ): string {
1284 1
        if ($str === '') {
1285 1
            return '';
1286
        }
1287
1288 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1289
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1290
        }
1291
1292 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1293
1294 1
        if ($length === null) {
1295 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1296
        }
1297
1298 1
        if ($search === '') {
1299 1
            if ($encoding === 'UTF-8') {
1300 1
                if ($length > 0) {
1301 1
                    $string_length = (int) \mb_strlen($str);
1302 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1303
                } else {
1304 1
                    $end = 0;
1305
                }
1306
1307 1
                $pos = (int) \min(
1308 1
                    \mb_strpos($str, ' ', $end),
1309 1
                    \mb_strpos($str, '.', $end)
1310
                );
1311
            } else {
1312
                if ($length > 0) {
1313
                    $string_length = (int) self::strlen($str, $encoding);
1314
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1315
                } else {
1316
                    $end = 0;
1317
                }
1318
1319
                $pos = (int) \min(
1320
                    self::strpos($str, ' ', $end, $encoding),
1321
                    self::strpos($str, '.', $end, $encoding)
1322
                );
1323
            }
1324
1325 1
            if ($pos) {
1326 1
                if ($encoding === 'UTF-8') {
1327 1
                    $str_sub = \mb_substr($str, 0, $pos);
1328
                } else {
1329
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1330
                }
1331
1332 1
                if ($str_sub === false) {
1333
                    return '';
1334
                }
1335
1336 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1337
            }
1338
1339
            return $str;
1340
        }
1341
1342 1
        if ($encoding === 'UTF-8') {
1343 1
            $word_position = (int) \mb_stripos($str, $search);
1344 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1345
        } else {
1346
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1347
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1348
        }
1349
1350 1
        $pos_start = 0;
1351 1
        if ($half_side > 0) {
1352 1
            if ($encoding === 'UTF-8') {
1353 1
                $half_text = \mb_substr($str, 0, $half_side);
1354
            } else {
1355
                $half_text = self::substr($str, 0, $half_side, $encoding);
1356
            }
1357 1
            if ($half_text !== false) {
1358 1
                if ($encoding === 'UTF-8') {
1359 1
                    $pos_start = (int) \max(
1360 1
                        \mb_strrpos($half_text, ' '),
1361 1
                        \mb_strrpos($half_text, '.')
1362
                    );
1363
                } else {
1364
                    $pos_start = (int) \max(
1365
                        self::strrpos($half_text, ' ', 0, $encoding),
1366
                        self::strrpos($half_text, '.', 0, $encoding)
1367
                    );
1368
                }
1369
            }
1370
        }
1371
1372 1
        if ($word_position && $half_side > 0) {
1373 1
            $offset = $pos_start + $length - 1;
1374 1
            $real_length = (int) self::strlen($str, $encoding);
1375
1376 1
            if ($offset > $real_length) {
1377
                $offset = $real_length;
1378
            }
1379
1380 1
            if ($encoding === 'UTF-8') {
1381 1
                $pos_end = (int) \min(
1382 1
                    \mb_strpos($str, ' ', $offset),
1383 1
                    \mb_strpos($str, '.', $offset)
1384 1
                ) - $pos_start;
1385
            } else {
1386
                $pos_end = (int) \min(
1387
                    self::strpos($str, ' ', $offset, $encoding),
1388
                    self::strpos($str, '.', $offset, $encoding)
1389
                ) - $pos_start;
1390
            }
1391
1392 1
            if (!$pos_end || $pos_end <= 0) {
1393 1
                if ($encoding === 'UTF-8') {
1394 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1395
                } else {
1396
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1397
                }
1398 1
                if ($str_sub !== false) {
1399 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1400
                } else {
1401 1
                    $extract = '';
1402
                }
1403
            } else {
1404 1
                if ($encoding === 'UTF-8') {
1405 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1406
                } else {
1407
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1408
                }
1409 1
                if ($str_sub !== false) {
1410 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1411
                } else {
1412 1
                    $extract = '';
1413
                }
1414
            }
1415
        } else {
1416 1
            $offset = $length - 1;
1417 1
            $true_length = (int) self::strlen($str, $encoding);
1418
1419 1
            if ($offset > $true_length) {
1420
                $offset = $true_length;
1421
            }
1422
1423 1
            if ($encoding === 'UTF-8') {
1424 1
                $pos_end = (int) \min(
1425 1
                    \mb_strpos($str, ' ', $offset),
1426 1
                    \mb_strpos($str, '.', $offset)
1427
                );
1428
            } else {
1429
                $pos_end = (int) \min(
1430
                    self::strpos($str, ' ', $offset, $encoding),
1431
                    self::strpos($str, '.', $offset, $encoding)
1432
                );
1433
            }
1434
1435 1
            if ($pos_end) {
1436 1
                if ($encoding === 'UTF-8') {
1437 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1438
                } else {
1439
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1440
                }
1441 1
                if ($str_sub !== false) {
1442 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1443
                } else {
1444 1
                    $extract = '';
1445
                }
1446
            } else {
1447 1
                $extract = $str;
1448
            }
1449
        }
1450
1451 1
        return $extract;
1452
    }
1453
1454
    /**
1455
     * Reads entire file into a string.
1456
     *
1457
     * WARNING: do not use UTF-8 Option ($convert_to_utf8) for binary-files (e.g.: images) !!!
1458
     *
1459
     * @see http://php.net/manual/en/function.file-get-contents.php
1460
     *
1461
     * @param string        $filename         <p>
1462
     *                                        Name of the file to read.
1463
     *                                        </p>
1464
     * @param bool          $use_include_path [optional] <p>
1465
     *                                        Prior to PHP 5, this parameter is called
1466
     *                                        use_include_path and is a bool.
1467
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1468
     *                                        to trigger include path
1469
     *                                        search.
1470
     *                                        </p>
1471
     * @param resource|null $context          [optional] <p>
1472
     *                                        A valid context resource created with
1473
     *                                        stream_context_create. If you don't need to use a
1474
     *                                        custom context, you can skip this parameter by &null;.
1475
     *                                        </p>
1476
     * @param int|null      $offset           [optional] <p>
1477
     *                                        The offset where the reading starts.
1478
     *                                        </p>
1479
     * @param int|null      $max_length       [optional] <p>
1480
     *                                        Maximum length of data read. The default is to read until end
1481
     *                                        of file is reached.
1482
     *                                        </p>
1483
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1484
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1485
     *                                        some files, because they used non default utf-8 chars. Binary files
1486
     *                                        like images or pdf will not be converted.</p>
1487
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1488
     *                                        A empty string will trigger the autodetect anyway.</p>
1489
     *
1490
     * @return false|string
1491
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1492
     */
1493 12
    public static function file_get_contents(
1494
        string $filename,
1495
        bool $use_include_path = false,
1496
        $context = null,
1497
        int $offset = null,
1498
        int $max_length = null,
1499
        int $timeout = 10,
1500
        bool $convert_to_utf8 = true,
1501
        string $from_encoding = ''
1502
    ) {
1503
        // init
1504 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1505
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1506 12
        if ($filename === false) {
1507
            return false;
1508
        }
1509
1510 12
        if ($timeout && $context === null) {
1511 9
            $context = \stream_context_create(
1512
                [
1513
                    'http' => [
1514 9
                        'timeout' => $timeout,
1515
                    ],
1516
                ]
1517
            );
1518
        }
1519
1520 12
        if ($offset === null) {
1521 12
            $offset = 0;
1522
        }
1523
1524 12
        if (\is_int($max_length) === true) {
1525 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1526
        } else {
1527 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1528
        }
1529
1530
        // return false on error
1531 12
        if ($data === false) {
1532
            return false;
1533
        }
1534
1535 12
        if ($convert_to_utf8 === true) {
1536
            if (
1537 12
                self::is_binary($data, true) !== true
1538
                ||
1539 9
                self::is_utf16($data, false) !== false
1540
                ||
1541 12
                self::is_utf32($data, false) !== false
1542
            ) {
1543 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1544 9
                $data = self::cleanup($data);
1545
            }
1546
        }
1547
1548 12
        return $data;
1549
    }
1550
1551
    /**
1552
     * Checks if a file starts with BOM (Byte Order Mark) character.
1553
     *
1554
     * @param string $file_path <p>Path to a valid file.</p>
1555
     *
1556
     * @throws \RuntimeException if file_get_contents() returned false
1557
     *
1558
     * @return bool
1559
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1560
     */
1561 2
    public static function file_has_bom(string $file_path): bool
1562
    {
1563 2
        $file_content = \file_get_contents($file_path);
1564 2
        if ($file_content === false) {
1565
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1566
        }
1567
1568 2
        return self::string_has_bom($file_content);
1569
    }
1570
1571
    /**
1572
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1573
     *
1574
     * @param mixed  $var
1575
     * @param int    $normalization_form
1576
     * @param string $leading_combining
1577
     *
1578
     * @return mixed
1579
     */
1580 62
    public static function filter(
1581
        $var,
1582
        int $normalization_form = \Normalizer::NFC,
1583
        string $leading_combining = '◌'
1584
    ) {
1585 62
        switch (\gettype($var)) {
1586 62
            case 'array':
1587
                /** @noinspection ForeachSourceInspection */
1588 6
                foreach ($var as $k => &$v) {
1589 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1590
                }
1591 6
                unset($v);
1592
1593 6
                break;
1594 62
            case 'object':
1595
                /** @noinspection ForeachSourceInspection */
1596 4
                foreach ($var as $k => &$v) {
1597 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1598
                }
1599 4
                unset($v);
1600
1601 4
                break;
1602 62
            case 'string':
1603
1604 62
                if (\strpos($var, "\r") !== false) {
1605
                    // Workaround https://bugs.php.net/65732
1606 3
                    $var = self::normalize_line_ending($var);
1607
                }
1608
1609 62
                if (ASCII::is_ascii($var) === false) {
1610 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1611 27
                        $n = '-';
1612
                    } else {
1613 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1614
1615 12
                        if (isset($n[0])) {
1616 7
                            $var = $n;
1617
                        } else {
1618 8
                            $var = self::encode('UTF-8', $var, true);
1619
                        }
1620
                    }
1621
1622
                    if (
1623 32
                        $var[0] >= "\x80"
1624
                        &&
1625 32
                        isset($n[0], $leading_combining[0])
1626
                        &&
1627 32
                        \preg_match('/^\\p{Mn}/u', $var)
1628
                    ) {
1629
                        // Prevent leading combining chars
1630
                        // for NFC-safe concatenations.
1631 3
                        $var = $leading_combining . $var;
1632
                    }
1633
                }
1634
1635 62
                break;
1636
        }
1637
1638 62
        return $var;
1639
    }
1640
1641
    /**
1642
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1643
     *
1644
     * Gets a specific external variable by name and optionally filters it
1645
     *
1646
     * @see http://php.net/manual/en/function.filter-input.php
1647
     *
1648
     * @param int    $type          <p>
1649
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1650
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1651
     *                              <b>INPUT_ENV</b>.
1652
     *                              </p>
1653
     * @param string $variable_name <p>
1654
     *                              Name of a variable to get.
1655
     *                              </p>
1656
     * @param int    $filter        [optional] <p>
1657
     *                              The ID of the filter to apply. The
1658
     *                              manual page lists the available filters.
1659
     *                              </p>
1660
     * @param mixed  $options       [optional] <p>
1661
     *                              Associative array of options or bitwise disjunction of flags. If filter
1662
     *                              accepts options, flags can be provided in "flags" field of array.
1663
     *                              </p>
1664
     *
1665
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1666
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1667
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1668
     */
1669
    public static function filter_input(
1670
        int $type,
1671
        string $variable_name,
1672
        int $filter = \FILTER_DEFAULT,
1673
        $options = null
1674
    ) {
1675
        if (\func_num_args() < 4) {
1676
            $var = \filter_input($type, $variable_name, $filter);
1677
        } else {
1678
            $var = \filter_input($type, $variable_name, $filter, $options);
1679
        }
1680
1681
        return self::filter($var);
1682
    }
1683
1684
    /**
1685
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1686
     *
1687
     * Gets external variables and optionally filters them
1688
     *
1689
     * @see http://php.net/manual/en/function.filter-input-array.php
1690
     *
1691
     * @param int   $type       <p>
1692
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1693
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1694
     *                          <b>INPUT_ENV</b>.
1695
     *                          </p>
1696
     * @param mixed $definition [optional] <p>
1697
     *                          An array defining the arguments. A valid key is a string
1698
     *                          containing a variable name and a valid value is either a filter type, or an array
1699
     *                          optionally specifying the filter, flags and options. If the value is an
1700
     *                          array, valid keys are filter which specifies the
1701
     *                          filter type,
1702
     *                          flags which specifies any flags that apply to the
1703
     *                          filter, and options which specifies any options that
1704
     *                          apply to the filter. See the example below for a better understanding.
1705
     *                          </p>
1706
     *                          <p>
1707
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1708
     *                          input array are filtered by this filter.
1709
     *                          </p>
1710
     * @param bool  $add_empty  [optional] <p>
1711
     *                          Add missing keys as <b>NULL</b> to the return value.
1712
     *                          </p>
1713
     *
1714
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1715
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1716
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1717
     *               is not set and <b>NULL</b> if the filter fails.
1718
     */
1719
    public static function filter_input_array(
1720
        int $type,
1721
        $definition = null,
1722
        bool $add_empty = true
1723
    ) {
1724
        if (\func_num_args() < 2) {
1725
            $a = \filter_input_array($type);
1726
        } else {
1727
            $a = \filter_input_array($type, $definition, $add_empty);
1728
        }
1729
1730
        return self::filter($a);
1731
    }
1732
1733
    /**
1734
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1735
     *
1736
     * Filters a variable with a specified filter
1737
     *
1738
     * @see http://php.net/manual/en/function.filter-var.php
1739
     *
1740
     * @param mixed $variable <p>
1741
     *                        Value to filter.
1742
     *                        </p>
1743
     * @param int   $filter   [optional] <p>
1744
     *                        The ID of the filter to apply. The
1745
     *                        manual page lists the available filters.
1746
     *                        </p>
1747
     * @param mixed $options  [optional] <p>
1748
     *                        Associative array of options or bitwise disjunction of flags. If filter
1749
     *                        accepts options, flags can be provided in "flags" field of array. For
1750
     *                        the "callback" filter, callable type should be passed. The
1751
     *                        callback must accept one argument, the value to be filtered, and return
1752
     *                        the value after filtering/sanitizing it.
1753
     *                        </p>
1754
     *                        <p>
1755
     *                        <code>
1756
     *                        // for filters that accept options, use this format
1757
     *                        $options = array(
1758
     *                        'options' => array(
1759
     *                        'default' => 3, // value to return if the filter fails
1760
     *                        // other options here
1761
     *                        'min_range' => 0
1762
     *                        ),
1763
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1764
     *                        );
1765
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1766
     *                        // for filter that only accept flags, you can pass them directly
1767
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1768
     *                        // for filter that only accept flags, you can also pass as an array
1769
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1770
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1771
     *                        // callback validate filter
1772
     *                        function foo($value)
1773
     *                        {
1774
     *                        // Expected format: Surname, GivenNames
1775
     *                        if (strpos($value, ", ") === false) return false;
1776
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1777
     *                        $empty = (empty($surname) || empty($givennames));
1778
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1779
     *                        if ($empty || $notstrings) {
1780
     *                        return false;
1781
     *                        } else {
1782
     *                        return $value;
1783
     *                        }
1784
     *                        }
1785
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1786
     *                        </code>
1787
     *                        </p>
1788
     *
1789
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1790
     */
1791 2
    public static function filter_var(
1792
        $variable,
1793
        int $filter = \FILTER_DEFAULT,
1794
        $options = null
1795
    ) {
1796 2
        if (\func_num_args() < 3) {
1797 2
            $variable = \filter_var($variable, $filter);
1798
        } else {
1799 2
            $variable = \filter_var($variable, $filter, $options);
1800
        }
1801
1802 2
        return self::filter($variable);
1803
    }
1804
1805
    /**
1806
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1807
     *
1808
     * Gets multiple variables and optionally filters them
1809
     *
1810
     * @see http://php.net/manual/en/function.filter-var-array.php
1811
     *
1812
     * @param array $data       <p>
1813
     *                          An array with string keys containing the data to filter.
1814
     *                          </p>
1815
     * @param mixed $definition [optional] <p>
1816
     *                          An array defining the arguments. A valid key is a string
1817
     *                          containing a variable name and a valid value is either a
1818
     *                          filter type, or an
1819
     *                          array optionally specifying the filter, flags and options.
1820
     *                          If the value is an array, valid keys are filter
1821
     *                          which specifies the filter type,
1822
     *                          flags which specifies any flags that apply to the
1823
     *                          filter, and options which specifies any options that
1824
     *                          apply to the filter. See the example below for a better understanding.
1825
     *                          </p>
1826
     *                          <p>
1827
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1828
     *                          input array are filtered by this filter.
1829
     *                          </p>
1830
     * @param bool  $add_empty  [optional] <p>
1831
     *                          Add missing keys as <b>NULL</b> to the return value.
1832
     *                          </p>
1833
     *
1834
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1835
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1836
     *               set
1837
     */
1838 2
    public static function filter_var_array(
1839
        array $data,
1840
        $definition = null,
1841
        bool $add_empty = true
1842
    ) {
1843 2
        if (\func_num_args() < 2) {
1844 2
            $a = \filter_var_array($data);
1845
        } else {
1846 2
            $a = \filter_var_array($data, $definition, $add_empty);
1847
        }
1848
1849 2
        return self::filter($a);
1850
    }
1851
1852
    /**
1853
     * Checks whether finfo is available on the server.
1854
     *
1855
     * @return bool
1856
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1857
     */
1858
    public static function finfo_loaded(): bool
1859
    {
1860
        return \class_exists('finfo');
1861
    }
1862
1863
    /**
1864
     * Returns the first $n characters of the string.
1865
     *
1866
     * @param string $str      <p>The input string.</p>
1867
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1868
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1869
     *
1870
     * @return string
1871
     */
1872 13
    public static function first_char(
1873
        string $str,
1874
        int $n = 1,
1875
        string $encoding = 'UTF-8'
1876
    ): string {
1877 13
        if ($str === '' || $n <= 0) {
1878 5
            return '';
1879
        }
1880
1881 8
        if ($encoding === 'UTF-8') {
1882 4
            return (string) \mb_substr($str, 0, $n);
1883
        }
1884
1885 4
        return (string) self::substr($str, 0, $n, $encoding);
1886
    }
1887
1888
    /**
1889
     * Check if the number of unicode characters are not more than the specified integer.
1890
     *
1891
     * @param string $str      the original string to be checked
1892
     * @param int    $box_size the size in number of chars to be checked against string
1893
     *
1894
     * @return bool true if string is less than or equal to $box_size, false otherwise
1895
     */
1896 2
    public static function fits_inside(string $str, int $box_size): bool
1897
    {
1898 2
        return self::strlen($str) <= $box_size;
1899
    }
1900
1901
    /**
1902
     * Try to fix simple broken UTF-8 strings.
1903
     *
1904
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1905
     *
1906
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1907
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1908
     * See: http://en.wikipedia.org/wiki/Windows-1252
1909
     *
1910
     * @param string $str <p>The input string</p>
1911
     *
1912
     * @return string
1913
     */
1914 46
    public static function fix_simple_utf8(string $str): string
1915
    {
1916 46
        if ($str === '') {
1917 4
            return '';
1918
        }
1919
1920 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1921 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1922
1923 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1924 1
            if (self::$BROKEN_UTF8_FIX === null) {
1925 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1926
            }
1927
1928 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1929 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1930
        }
1931
1932 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1933
    }
1934
1935
    /**
1936
     * Fix a double (or multiple) encoded UTF8 string.
1937
     *
1938
     * @param string|string[] $str you can use a string or an array of strings
1939
     *
1940
     * @return string|string[]
1941
     *                         Will return the fixed input-"array" or
1942
     *                         the fixed input-"string"
1943
     *
1944
     * @psalm-suppress InvalidReturnType
1945
     */
1946 2
    public static function fix_utf8($str)
1947
    {
1948 2
        if (\is_array($str) === true) {
1949 2
            foreach ($str as $k => &$v) {
1950 2
                $v = self::fix_utf8($v);
1951
            }
1952 2
            unset($v);
1953
1954
            /**
1955
             * @psalm-suppress InvalidReturnStatement
1956
             */
1957 2
            return $str;
1958
        }
1959
1960 2
        $str = (string) $str;
1961 2
        $last = '';
1962 2
        while ($last !== $str) {
1963 2
            $last = $str;
1964
            /**
1965
             * @psalm-suppress PossiblyInvalidArgument
1966
             */
1967 2
            $str = self::to_utf8(
1968 2
                self::utf8_decode($str, true)
1969
            );
1970
        }
1971
1972
        /**
1973
         * @psalm-suppress InvalidReturnStatement
1974
         */
1975 2
        return $str;
1976
    }
1977
1978
    /**
1979
     * Get character of a specific character.
1980
     *
1981
     * @param string $char
1982
     *
1983
     * @return string 'RTL' or 'LTR'
1984
     */
1985 2
    public static function getCharDirection(string $char): string
1986
    {
1987 2
        if (self::$SUPPORT['intlChar'] === true) {
1988
            /** @noinspection PhpComposerExtensionStubsInspection */
1989 2
            $tmp_return = \IntlChar::charDirection($char);
1990
1991
            // from "IntlChar"-Class
1992
            $char_direction = [
1993 2
                'RTL' => [1, 13, 14, 15, 21],
1994
                'LTR' => [0, 11, 12, 20],
1995
            ];
1996
1997 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
1998
                return 'LTR';
1999
            }
2000
2001 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2002 2
                return 'RTL';
2003
            }
2004
        }
2005
2006 2
        $c = static::chr_to_decimal($char);
2007
2008 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2009 2
            return 'LTR';
2010
        }
2011
2012 2
        if ($c <= 0x85e) {
2013 2
            if ($c === 0x5be ||
2014 2
                $c === 0x5c0 ||
2015 2
                $c === 0x5c3 ||
2016 2
                $c === 0x5c6 ||
2017 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2018 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2019 2
                $c === 0x608 ||
2020 2
                $c === 0x60b ||
2021 2
                $c === 0x60d ||
2022 2
                $c === 0x61b ||
2023 2
                ($c >= 0x61e && $c <= 0x64a) ||
2024
                ($c >= 0x66d && $c <= 0x66f) ||
2025
                ($c >= 0x671 && $c <= 0x6d5) ||
2026
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2027
                ($c >= 0x6ee && $c <= 0x6ef) ||
2028
                ($c >= 0x6fa && $c <= 0x70d) ||
2029
                $c === 0x710 ||
2030
                ($c >= 0x712 && $c <= 0x72f) ||
2031
                ($c >= 0x74d && $c <= 0x7a5) ||
2032
                $c === 0x7b1 ||
2033
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2034
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2035
                $c === 0x7fa ||
2036
                ($c >= 0x800 && $c <= 0x815) ||
2037
                $c === 0x81a ||
2038
                $c === 0x824 ||
2039
                $c === 0x828 ||
2040
                ($c >= 0x830 && $c <= 0x83e) ||
2041
                ($c >= 0x840 && $c <= 0x858) ||
2042 2
                $c === 0x85e
2043
            ) {
2044 2
                return 'RTL';
2045
            }
2046 2
        } elseif ($c === 0x200f) {
2047
            return 'RTL';
2048 2
        } elseif ($c >= 0xfb1d) {
2049 2
            if ($c === 0xfb1d ||
2050 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2051 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2052 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2053 2
                $c === 0xfb3e ||
2054 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2055 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2056 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2057 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2058 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2059 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2060 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2061 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2062 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2063 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2064 2
                $c === 0x10808 ||
2065 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2066 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2067 2
                $c === 0x1083c ||
2068 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2069 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2070 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2071 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2072 2
                $c === 0x1093f ||
2073 2
                $c === 0x10a00 ||
2074 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2075 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2076 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2077 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2078 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2079 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2080 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2081 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2082 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2083 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2084
            ) {
2085 2
                return 'RTL';
2086
            }
2087
        }
2088
2089 2
        return 'LTR';
2090
    }
2091
2092
    /**
2093
     * Check for php-support.
2094
     *
2095
     * @param string|null $key
2096
     *
2097
     * @return mixed
2098
     *               Return the full support-"array", if $key === null<br>
2099
     *               return bool-value, if $key is used and available<br>
2100
     *               otherwise return <strong>null</strong>
2101
     */
2102 27
    public static function getSupportInfo(string $key = null)
2103
    {
2104 27
        if ($key === null) {
2105 4
            return self::$SUPPORT;
2106
        }
2107
2108 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2109 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2110
        }
2111
        // compatibility fix for old versions
2112 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2113
2114 25
        return self::$SUPPORT[$key] ?? null;
2115
    }
2116
2117
    /**
2118
     * Warning: this method only works for some file-types (png, jpg)
2119
     *          if you need more supported types, please use e.g. "finfo"
2120
     *
2121
     * @param string $str
2122
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2123
     *
2124
     * @return array
2125
     *               with this keys: 'ext', 'mime', 'type'
2126
     */
2127 39
    public static function get_file_type(
2128
        string $str,
2129
        array $fallback = [
2130
            'ext'  => null,
2131
            'mime' => 'application/octet-stream',
2132
            'type' => null,
2133
        ]
2134
    ): array {
2135 39
        if ($str === '') {
2136
            return $fallback;
2137
        }
2138
2139 39
        $str_info = \substr($str, 0, 2);
2140 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2141 11
            return $fallback;
2142
        }
2143
2144
        // DEBUG
2145
        //var_dump($str_info);
2146
2147 35
        $str_info = \unpack('C2chars', $str_info);
2148 35
        if ($str_info === false) {
2149
            return $fallback;
2150
        }
2151
        /** @noinspection OffsetOperationsInspection */
2152 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2153
2154
        // DEBUG
2155
        //var_dump($type_code);
2156
2157
        //
2158
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2159
        //
2160
        switch ($type_code) {
2161
            // WARNING: do not add too simple comparisons, because of false-positive results:
2162
            //
2163
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2164
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2165
            //
2166 35
            case 255216:
2167
                $ext = 'jpg';
2168
                $mime = 'image/jpeg';
2169
                $type = 'binary';
2170
2171
                break;
2172 35
            case 13780:
2173 7
                $ext = 'png';
2174 7
                $mime = 'image/png';
2175 7
                $type = 'binary';
2176
2177 7
                break;
2178
            default:
2179 34
                return $fallback;
2180
        }
2181
2182
        return [
2183 7
            'ext'  => $ext,
2184 7
            'mime' => $mime,
2185 7
            'type' => $type,
2186
        ];
2187
    }
2188
2189
    /**
2190
     * @param int    $length         <p>Length of the random string.</p>
2191
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2192
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2193
     *
2194
     * @return string
2195
     */
2196 1
    public static function get_random_string(
2197
        int $length,
2198
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2199
        string $encoding = 'UTF-8'
2200
    ): string {
2201
        // init
2202 1
        $i = 0;
2203 1
        $str = '';
2204
2205
        //
2206
        // add random chars
2207
        //
2208
2209 1
        if ($encoding === 'UTF-8') {
2210 1
            $max_length = (int) \mb_strlen($possible_chars);
2211 1
            if ($max_length === 0) {
2212 1
                return '';
2213
            }
2214
2215 1
            while ($i < $length) {
2216
                try {
2217 1
                    $rand_int = \random_int(0, $max_length - 1);
2218
                } catch (\Exception $e) {
2219
                    /** @noinspection RandomApiMigrationInspection */
2220
                    $rand_int = \mt_rand(0, $max_length - 1);
2221
                }
2222 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2223 1
                if ($char !== false) {
2224 1
                    $str .= $char;
2225 1
                    ++$i;
2226
                }
2227
            }
2228
        } else {
2229
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2230
2231
            $max_length = (int) self::strlen($possible_chars, $encoding);
2232
            if ($max_length === 0) {
2233
                return '';
2234
            }
2235
2236
            while ($i < $length) {
2237
                try {
2238
                    $rand_int = \random_int(0, $max_length - 1);
2239
                } catch (\Exception $e) {
2240
                    /** @noinspection RandomApiMigrationInspection */
2241
                    $rand_int = \mt_rand(0, $max_length - 1);
2242
                }
2243
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2244
                if ($char !== false) {
2245
                    $str .= $char;
2246
                    ++$i;
2247
                }
2248
            }
2249
        }
2250
2251 1
        return $str;
2252
    }
2253
2254
    /**
2255
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2256
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2257
     *
2258
     * @return string
2259
     */
2260 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2261
    {
2262 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2263 1
                        \session_id() .
2264 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2265 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2266 1
                        $entropy_extra;
2267
2268 1
        $unique_string = \uniqid($unique_helper, true);
2269
2270 1
        if ($use_md5) {
2271 1
            $unique_string = \md5($unique_string . $unique_helper);
2272
        }
2273
2274 1
        return $unique_string;
2275
    }
2276
2277
    /**
2278
     * alias for "UTF8::string_has_bom()"
2279
     *
2280
     * @param string $str
2281
     *
2282
     * @return bool
2283
     *
2284
     * @see UTF8::string_has_bom()
2285
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2286
     */
2287 2
    public static function hasBom(string $str): bool
2288
    {
2289 2
        return self::string_has_bom($str);
2290
    }
2291
2292
    /**
2293
     * Returns true if the string contains a lower case char, false otherwise.
2294
     *
2295
     * @param string $str <p>The input string.</p>
2296
     *
2297
     * @return bool whether or not the string contains a lower case character
2298
     */
2299 47
    public static function has_lowercase(string $str): bool
2300
    {
2301 47
        if (self::$SUPPORT['mbstring'] === true) {
2302
            /** @noinspection PhpComposerExtensionStubsInspection */
2303 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2304
        }
2305
2306
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2307
    }
2308
2309
    /**
2310
     * Returns true if the string contains an upper case char, false otherwise.
2311
     *
2312
     * @param string $str <p>The input string.</p>
2313
     *
2314
     * @return bool whether or not the string contains an upper case character
2315
     */
2316 12
    public static function has_uppercase(string $str): bool
2317
    {
2318 12
        if (self::$SUPPORT['mbstring'] === true) {
2319
            /** @noinspection PhpComposerExtensionStubsInspection */
2320 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2321
        }
2322
2323
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2324
    }
2325
2326
    /**
2327
     * Converts a hexadecimal-value into an UTF-8 character.
2328
     *
2329
     * @param string $hexdec <p>The hexadecimal value.</p>
2330
     *
2331
     * @return false|string one single UTF-8 character
2332
     */
2333 4
    public static function hex_to_chr(string $hexdec)
2334
    {
2335 4
        return self::decimal_to_chr(\hexdec($hexdec));
2336
    }
2337
2338
    /**
2339
     * Converts hexadecimal U+xxxx code point representation to integer.
2340
     *
2341
     * INFO: opposite to UTF8::int_to_hex()
2342
     *
2343
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2344
     *
2345
     * @return false|int the code point, or false on failure
2346
     */
2347 2
    public static function hex_to_int($hexdec)
2348
    {
2349
        // init
2350 2
        $hexdec = (string) $hexdec;
2351
2352 2
        if ($hexdec === '') {
2353 2
            return false;
2354
        }
2355
2356 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2357 2
            return \intval($match[1], 16);
2358
        }
2359
2360 2
        return false;
2361
    }
2362
2363
    /**
2364
     * alias for "UTF8::html_entity_decode()"
2365
     *
2366
     * @param string $str
2367
     * @param int    $flags
2368
     * @param string $encoding
2369
     *
2370
     * @return string
2371
     *
2372
     * @see UTF8::html_entity_decode()
2373
     */
2374 4
    public static function html_decode(
2375
        string $str,
2376
        int $flags = null,
2377
        string $encoding = 'UTF-8'
2378
    ): string {
2379 4
        return self::html_entity_decode($str, $flags, $encoding);
2380
    }
2381
2382
    /**
2383
     * Converts a UTF-8 string to a series of HTML numbered entities.
2384
     *
2385
     * INFO: opposite to UTF8::html_decode()
2386
     *
2387
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2388
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2389
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2390
     *
2391
     * @return string HTML numbered entities
2392
     */
2393 14
    public static function html_encode(
2394
        string $str,
2395
        bool $keep_ascii_chars = false,
2396
        string $encoding = 'UTF-8'
2397
    ): string {
2398 14
        if ($str === '') {
2399 4
            return '';
2400
        }
2401
2402 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2403 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2404
        }
2405
2406
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2407 14
        if (self::$SUPPORT['mbstring'] === true) {
2408 14
            $start_code = 0x00;
2409 14
            if ($keep_ascii_chars === true) {
2410 13
                $start_code = 0x80;
2411
            }
2412
2413 14
            if ($encoding === 'UTF-8') {
2414 14
                return \mb_encode_numericentity(
2415 14
                    $str,
2416 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2417
                );
2418
            }
2419
2420 4
            return \mb_encode_numericentity(
2421 4
                $str,
2422 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2423 4
                $encoding
2424
            );
2425
        }
2426
2427
        //
2428
        // fallback via vanilla php
2429
        //
2430
2431
        return \implode(
2432
            '',
2433
            \array_map(
2434
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2435
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2436
                },
2437
                self::str_split($str)
2438
            )
2439
        );
2440
    }
2441
2442
    /**
2443
     * UTF-8 version of html_entity_decode()
2444
     *
2445
     * The reason we are not using html_entity_decode() by itself is because
2446
     * while it is not technically correct to leave out the semicolon
2447
     * at the end of an entity most browsers will still interpret the entity
2448
     * correctly. html_entity_decode() does not convert entities without
2449
     * semicolons, so we are left with our own little solution here. Bummer.
2450
     *
2451
     * Convert all HTML entities to their applicable characters
2452
     *
2453
     * INFO: opposite to UTF8::html_encode()
2454
     *
2455
     * @see http://php.net/manual/en/function.html-entity-decode.php
2456
     *
2457
     * @param string $str      <p>
2458
     *                         The input string.
2459
     *                         </p>
2460
     * @param int    $flags    [optional] <p>
2461
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2462
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2463
     *                         <table>
2464
     *                         Available <i>flags</i> constants
2465
     *                         <tr valign="top">
2466
     *                         <td>Constant Name</td>
2467
     *                         <td>Description</td>
2468
     *                         </tr>
2469
     *                         <tr valign="top">
2470
     *                         <td><b>ENT_COMPAT</b></td>
2471
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2472
     *                         </tr>
2473
     *                         <tr valign="top">
2474
     *                         <td><b>ENT_QUOTES</b></td>
2475
     *                         <td>Will convert both double and single quotes.</td>
2476
     *                         </tr>
2477
     *                         <tr valign="top">
2478
     *                         <td><b>ENT_NOQUOTES</b></td>
2479
     *                         <td>Will leave both double and single quotes unconverted.</td>
2480
     *                         </tr>
2481
     *                         <tr valign="top">
2482
     *                         <td><b>ENT_HTML401</b></td>
2483
     *                         <td>
2484
     *                         Handle code as HTML 4.01.
2485
     *                         </td>
2486
     *                         </tr>
2487
     *                         <tr valign="top">
2488
     *                         <td><b>ENT_XML1</b></td>
2489
     *                         <td>
2490
     *                         Handle code as XML 1.
2491
     *                         </td>
2492
     *                         </tr>
2493
     *                         <tr valign="top">
2494
     *                         <td><b>ENT_XHTML</b></td>
2495
     *                         <td>
2496
     *                         Handle code as XHTML.
2497
     *                         </td>
2498
     *                         </tr>
2499
     *                         <tr valign="top">
2500
     *                         <td><b>ENT_HTML5</b></td>
2501
     *                         <td>
2502
     *                         Handle code as HTML 5.
2503
     *                         </td>
2504
     *                         </tr>
2505
     *                         </table>
2506
     *                         </p>
2507
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2508
     *
2509
     * @return string the decoded string
2510
     */
2511 46
    public static function html_entity_decode(
2512
        string $str,
2513
        int $flags = null,
2514
        string $encoding = 'UTF-8'
2515
    ): string {
2516
        if (
2517 46
            !isset($str[3]) // examples: &; || &x;
2518
            ||
2519 46
            \strpos($str, '&') === false // no "&"
2520
        ) {
2521 23
            return $str;
2522
        }
2523
2524 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2525 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2526
        }
2527
2528 44
        if ($flags === null) {
2529 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2530
        }
2531
2532
        if (
2533 44
            $encoding !== 'UTF-8'
2534
            &&
2535 44
            $encoding !== 'ISO-8859-1'
2536
            &&
2537 44
            $encoding !== 'WINDOWS-1252'
2538
            &&
2539 44
            self::$SUPPORT['mbstring'] === false
2540
        ) {
2541
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2542
        }
2543
2544
        do {
2545 44
            $str_compare = $str;
2546
2547
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2548 44
            if (self::$SUPPORT['mbstring'] === true) {
2549 44
                if ($encoding === 'UTF-8') {
2550 44
                    $str = \mb_decode_numericentity(
2551 44
                        $str,
2552 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2553
                    );
2554
                } else {
2555 4
                    $str = \mb_decode_numericentity(
2556 4
                        $str,
2557 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2558 44
                        $encoding
2559
                    );
2560
                }
2561
            } else {
2562
                $str = (string) \preg_replace_callback(
2563
                    "/&#\d{2,6};/",
2564
                    /**
2565
                     * @param string[] $matches
2566
                     *
2567
                     * @return string
2568
                     */
2569
                    static function (array $matches) use ($encoding): string {
2570
                        $return_tmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2571
                        if ($return_tmp !== '"' && $return_tmp !== "'") {
2572
                            return $return_tmp;
2573
                        }
2574
2575
                        return $matches[0];
2576
                    },
2577
                    $str
2578
                );
2579
            }
2580
2581 44
            if (\strpos($str, '&') !== false) {
2582 40
                if (\strpos($str, '&#') !== false) {
2583
                    // decode also numeric & UTF16 two byte entities
2584 32
                    $str = (string) \preg_replace(
2585 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2586 32
                        '$1;',
2587 32
                        $str
2588
                    );
2589
                }
2590
2591 40
                $str = \html_entity_decode(
2592 40
                    $str,
2593 40
                    $flags,
2594 40
                    $encoding
2595
                );
2596
            }
2597 44
        } while ($str_compare !== $str);
2598
2599 44
        return $str;
2600
    }
2601
2602
    /**
2603
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2604
     *
2605
     * @param string $str
2606
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2607
     *
2608
     * @return string
2609
     */
2610 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2611
    {
2612 6
        return self::htmlspecialchars(
2613 6
            $str,
2614 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2615 6
            $encoding
2616
        );
2617
    }
2618
2619
    /**
2620
     * Remove empty html-tag.
2621
     *
2622
     * e.g.: <tag></tag>
2623
     *
2624
     * @param string $str
2625
     *
2626
     * @return string
2627
     */
2628 1
    public static function html_stripe_empty_tags(string $str): string
2629
    {
2630 1
        return (string) \preg_replace(
2631 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2632 1
            '',
2633 1
            $str
2634
        );
2635
    }
2636
2637
    /**
2638
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2639
     *
2640
     * @see http://php.net/manual/en/function.htmlentities.php
2641
     *
2642
     * @param string $str           <p>
2643
     *                              The input string.
2644
     *                              </p>
2645
     * @param int    $flags         [optional] <p>
2646
     *                              A bitmask of one or more of the following flags, which specify how to handle
2647
     *                              quotes, invalid code unit sequences and the used document type. The default is
2648
     *                              ENT_COMPAT | ENT_HTML401.
2649
     *                              <table>
2650
     *                              Available <i>flags</i> constants
2651
     *                              <tr valign="top">
2652
     *                              <td>Constant Name</td>
2653
     *                              <td>Description</td>
2654
     *                              </tr>
2655
     *                              <tr valign="top">
2656
     *                              <td><b>ENT_COMPAT</b></td>
2657
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2658
     *                              </tr>
2659
     *                              <tr valign="top">
2660
     *                              <td><b>ENT_QUOTES</b></td>
2661
     *                              <td>Will convert both double and single quotes.</td>
2662
     *                              </tr>
2663
     *                              <tr valign="top">
2664
     *                              <td><b>ENT_NOQUOTES</b></td>
2665
     *                              <td>Will leave both double and single quotes unconverted.</td>
2666
     *                              </tr>
2667
     *                              <tr valign="top">
2668
     *                              <td><b>ENT_IGNORE</b></td>
2669
     *                              <td>
2670
     *                              Silently discard invalid code unit sequences instead of returning
2671
     *                              an empty string. Using this flag is discouraged as it
2672
     *                              may have security implications.
2673
     *                              </td>
2674
     *                              </tr>
2675
     *                              <tr valign="top">
2676
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2677
     *                              <td>
2678
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2679
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2680
     *                              string.
2681
     *                              </td>
2682
     *                              </tr>
2683
     *                              <tr valign="top">
2684
     *                              <td><b>ENT_DISALLOWED</b></td>
2685
     *                              <td>
2686
     *                              Replace invalid code points for the given document type with a
2687
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2688
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2689
     *                              instance, to ensure the well-formedness of XML documents with
2690
     *                              embedded external content.
2691
     *                              </td>
2692
     *                              </tr>
2693
     *                              <tr valign="top">
2694
     *                              <td><b>ENT_HTML401</b></td>
2695
     *                              <td>
2696
     *                              Handle code as HTML 4.01.
2697
     *                              </td>
2698
     *                              </tr>
2699
     *                              <tr valign="top">
2700
     *                              <td><b>ENT_XML1</b></td>
2701
     *                              <td>
2702
     *                              Handle code as XML 1.
2703
     *                              </td>
2704
     *                              </tr>
2705
     *                              <tr valign="top">
2706
     *                              <td><b>ENT_XHTML</b></td>
2707
     *                              <td>
2708
     *                              Handle code as XHTML.
2709
     *                              </td>
2710
     *                              </tr>
2711
     *                              <tr valign="top">
2712
     *                              <td><b>ENT_HTML5</b></td>
2713
     *                              <td>
2714
     *                              Handle code as HTML 5.
2715
     *                              </td>
2716
     *                              </tr>
2717
     *                              </table>
2718
     *                              </p>
2719
     * @param string $encoding      [optional] <p>
2720
     *                              Like <b>htmlspecialchars</b>,
2721
     *                              <b>htmlentities</b> takes an optional third argument
2722
     *                              <i>encoding</i> which defines encoding used in
2723
     *                              conversion.
2724
     *                              Although this argument is technically optional, you are highly
2725
     *                              encouraged to specify the correct value for your code.
2726
     *                              </p>
2727
     * @param bool   $double_encode [optional] <p>
2728
     *                              When <i>double_encode</i> is turned off PHP will not
2729
     *                              encode existing html entities. The default is to convert everything.
2730
     *                              </p>
2731
     *
2732
     * @return string
2733
     *                <p>
2734
     *                The encoded string.
2735
     *                <br><br>
2736
     *                If the input <i>string</i> contains an invalid code unit
2737
     *                sequence within the given <i>encoding</i> an empty string
2738
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2739
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2740
     *                </p>
2741
     */
2742 9
    public static function htmlentities(
2743
        string $str,
2744
        int $flags = \ENT_COMPAT,
2745
        string $encoding = 'UTF-8',
2746
        bool $double_encode = true
2747
    ): string {
2748 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2749 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2750
        }
2751
2752 9
        $str = \htmlentities(
2753 9
            $str,
2754 9
            $flags,
2755 9
            $encoding,
2756 9
            $double_encode
2757
        );
2758
2759
        /**
2760
         * PHP doesn't replace a backslash to its html entity since this is something
2761
         * that's mostly used to escape characters when inserting in a database. Since
2762
         * we're using a decent database layer, we don't need this shit and we're replacing
2763
         * the double backslashes by its' html entity equivalent.
2764
         *
2765
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2766
         */
2767 9
        $str = \str_replace('\\', '&#92;', $str);
2768
2769 9
        return self::html_encode($str, true, $encoding);
2770
    }
2771
2772
    /**
2773
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2774
     *
2775
     * INFO: Take a look at "UTF8::htmlentities()"
2776
     *
2777
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2778
     *
2779
     * @param string $str           <p>
2780
     *                              The string being converted.
2781
     *                              </p>
2782
     * @param int    $flags         [optional] <p>
2783
     *                              A bitmask of one or more of the following flags, which specify how to handle
2784
     *                              quotes, invalid code unit sequences and the used document type. The default is
2785
     *                              ENT_COMPAT | ENT_HTML401.
2786
     *                              <table>
2787
     *                              Available <i>flags</i> constants
2788
     *                              <tr valign="top">
2789
     *                              <td>Constant Name</td>
2790
     *                              <td>Description</td>
2791
     *                              </tr>
2792
     *                              <tr valign="top">
2793
     *                              <td><b>ENT_COMPAT</b></td>
2794
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2795
     *                              </tr>
2796
     *                              <tr valign="top">
2797
     *                              <td><b>ENT_QUOTES</b></td>
2798
     *                              <td>Will convert both double and single quotes.</td>
2799
     *                              </tr>
2800
     *                              <tr valign="top">
2801
     *                              <td><b>ENT_NOQUOTES</b></td>
2802
     *                              <td>Will leave both double and single quotes unconverted.</td>
2803
     *                              </tr>
2804
     *                              <tr valign="top">
2805
     *                              <td><b>ENT_IGNORE</b></td>
2806
     *                              <td>
2807
     *                              Silently discard invalid code unit sequences instead of returning
2808
     *                              an empty string. Using this flag is discouraged as it
2809
     *                              may have security implications.
2810
     *                              </td>
2811
     *                              </tr>
2812
     *                              <tr valign="top">
2813
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2814
     *                              <td>
2815
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2816
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2817
     *                              string.
2818
     *                              </td>
2819
     *                              </tr>
2820
     *                              <tr valign="top">
2821
     *                              <td><b>ENT_DISALLOWED</b></td>
2822
     *                              <td>
2823
     *                              Replace invalid code points for the given document type with a
2824
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2825
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2826
     *                              instance, to ensure the well-formedness of XML documents with
2827
     *                              embedded external content.
2828
     *                              </td>
2829
     *                              </tr>
2830
     *                              <tr valign="top">
2831
     *                              <td><b>ENT_HTML401</b></td>
2832
     *                              <td>
2833
     *                              Handle code as HTML 4.01.
2834
     *                              </td>
2835
     *                              </tr>
2836
     *                              <tr valign="top">
2837
     *                              <td><b>ENT_XML1</b></td>
2838
     *                              <td>
2839
     *                              Handle code as XML 1.
2840
     *                              </td>
2841
     *                              </tr>
2842
     *                              <tr valign="top">
2843
     *                              <td><b>ENT_XHTML</b></td>
2844
     *                              <td>
2845
     *                              Handle code as XHTML.
2846
     *                              </td>
2847
     *                              </tr>
2848
     *                              <tr valign="top">
2849
     *                              <td><b>ENT_HTML5</b></td>
2850
     *                              <td>
2851
     *                              Handle code as HTML 5.
2852
     *                              </td>
2853
     *                              </tr>
2854
     *                              </table>
2855
     *                              </p>
2856
     * @param string $encoding      [optional] <p>
2857
     *                              Defines encoding used in conversion.
2858
     *                              </p>
2859
     *                              <p>
2860
     *                              For the purposes of this function, the encodings
2861
     *                              ISO-8859-1, ISO-8859-15,
2862
     *                              UTF-8, cp866,
2863
     *                              cp1251, cp1252, and
2864
     *                              KOI8-R are effectively equivalent, provided the
2865
     *                              <i>string</i> itself is valid for the encoding, as
2866
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2867
     *                              the same positions in all of these encodings.
2868
     *                              </p>
2869
     * @param bool   $double_encode [optional] <p>
2870
     *                              When <i>double_encode</i> is turned off PHP will not
2871
     *                              encode existing html entities, the default is to convert everything.
2872
     *                              </p>
2873
     *
2874
     * @return string the converted string.
2875
     *                </p>
2876
     *                <p>
2877
     *                If the input <i>string</i> contains an invalid code unit
2878
     *                sequence within the given <i>encoding</i> an empty string
2879
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2880
     *                <b>ENT_SUBSTITUTE</b> flags are set
2881
     */
2882 8
    public static function htmlspecialchars(
2883
        string $str,
2884
        int $flags = \ENT_COMPAT,
2885
        string $encoding = 'UTF-8',
2886
        bool $double_encode = true
2887
    ): string {
2888 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2889 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2890
        }
2891
2892 8
        return \htmlspecialchars(
2893 8
            $str,
2894 8
            $flags,
2895 8
            $encoding,
2896 8
            $double_encode
2897
        );
2898
    }
2899
2900
    /**
2901
     * Checks whether iconv is available on the server.
2902
     *
2903
     * @return bool
2904
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2905
     */
2906
    public static function iconv_loaded(): bool
2907
    {
2908
        return \extension_loaded('iconv');
2909
    }
2910
2911
    /**
2912
     * alias for "UTF8::decimal_to_chr()"
2913
     *
2914
     * @param mixed $int
2915
     *
2916
     * @return string
2917
     *
2918
     * @see UTF8::decimal_to_chr()
2919
     */
2920 4
    public static function int_to_chr($int): string
2921
    {
2922 4
        return self::decimal_to_chr($int);
2923
    }
2924
2925
    /**
2926
     * Converts Integer to hexadecimal U+xxxx code point representation.
2927
     *
2928
     * INFO: opposite to UTF8::hex_to_int()
2929
     *
2930
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2931
     * @param string $prefix [optional]
2932
     *
2933
     * @return string the code point, or empty string on failure
2934
     */
2935 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2936
    {
2937 6
        $hex = \dechex($int);
2938
2939 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2940
2941 6
        return $prefix . $hex . '';
2942
    }
2943
2944
    /**
2945
     * Checks whether intl-char is available on the server.
2946
     *
2947
     * @return bool
2948
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2949
     */
2950
    public static function intlChar_loaded(): bool
2951
    {
2952
        return \class_exists('IntlChar');
2953
    }
2954
2955
    /**
2956
     * Checks whether intl is available on the server.
2957
     *
2958
     * @return bool
2959
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2960
     */
2961 5
    public static function intl_loaded(): bool
2962
    {
2963 5
        return \extension_loaded('intl');
2964
    }
2965
2966
    /**
2967
     * alias for "UTF8::is_ascii()"
2968
     *
2969
     * @param string $str
2970
     *
2971
     * @return bool
2972
     *
2973
     * @see UTF8::is_ascii()
2974
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2975
     */
2976 2
    public static function isAscii(string $str): bool
2977
    {
2978 2
        return ASCII::is_ascii($str);
2979
    }
2980
2981
    /**
2982
     * alias for "UTF8::is_base64()"
2983
     *
2984
     * @param string $str
2985
     *
2986
     * @return bool
2987
     *
2988
     * @see UTF8::is_base64()
2989
     * @deprecated <p>please use "UTF8::is_base64()"</p>
2990
     */
2991 2
    public static function isBase64($str): bool
2992
    {
2993 2
        return self::is_base64($str);
2994
    }
2995
2996
    /**
2997
     * alias for "UTF8::is_binary()"
2998
     *
2999
     * @param mixed $str
3000
     * @param bool  $strict
3001
     *
3002
     * @return bool
3003
     *
3004
     * @see UTF8::is_binary()
3005
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3006
     */
3007 4
    public static function isBinary($str, $strict = false): bool
3008
    {
3009 4
        return self::is_binary($str, $strict);
3010
    }
3011
3012
    /**
3013
     * alias for "UTF8::is_bom()"
3014
     *
3015
     * @param string $utf8_chr
3016
     *
3017
     * @return bool
3018
     *
3019
     * @see UTF8::is_bom()
3020
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3021
     */
3022 2
    public static function isBom(string $utf8_chr): bool
3023
    {
3024 2
        return self::is_bom($utf8_chr);
3025
    }
3026
3027
    /**
3028
     * alias for "UTF8::is_html()"
3029
     *
3030
     * @param string $str
3031
     *
3032
     * @return bool
3033
     *
3034
     * @see UTF8::is_html()
3035
     * @deprecated <p>please use "UTF8::is_html()"</p>
3036
     */
3037 2
    public static function isHtml(string $str): bool
3038
    {
3039 2
        return self::is_html($str);
3040
    }
3041
3042
    /**
3043
     * alias for "UTF8::is_json()"
3044
     *
3045
     * @param string $str
3046
     *
3047
     * @return bool
3048
     *
3049
     * @see UTF8::is_json()
3050
     * @deprecated <p>please use "UTF8::is_json()"</p>
3051
     */
3052
    public static function isJson(string $str): bool
3053
    {
3054
        return self::is_json($str);
3055
    }
3056
3057
    /**
3058
     * alias for "UTF8::is_utf16()"
3059
     *
3060
     * @param mixed $str
3061
     *
3062
     * @return false|int
3063
     *                   <strong>false</strong> if is't not UTF16,<br>
3064
     *                   <strong>1</strong> for UTF-16LE,<br>
3065
     *                   <strong>2</strong> for UTF-16BE
3066
     *
3067
     * @see UTF8::is_utf16()
3068
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3069
     */
3070 2
    public static function isUtf16($str)
3071
    {
3072 2
        return self::is_utf16($str);
3073
    }
3074
3075
    /**
3076
     * alias for "UTF8::is_utf32()"
3077
     *
3078
     * @param mixed $str
3079
     *
3080
     * @return false|int
3081
     *                   <strong>false</strong> if is't not UTF16,
3082
     *                   <strong>1</strong> for UTF-32LE,
3083
     *                   <strong>2</strong> for UTF-32BE
3084
     *
3085
     * @see UTF8::is_utf32()
3086
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3087
     */
3088 2
    public static function isUtf32($str)
3089
    {
3090 2
        return self::is_utf32($str);
3091
    }
3092
3093
    /**
3094
     * alias for "UTF8::is_utf8()"
3095
     *
3096
     * @param string $str
3097
     * @param bool   $strict
3098
     *
3099
     * @return bool
3100
     *
3101
     * @see UTF8::is_utf8()
3102
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3103
     */
3104 17
    public static function isUtf8($str, $strict = false): bool
3105
    {
3106 17
        return self::is_utf8($str, $strict);
3107
    }
3108
3109
    /**
3110
     * Returns true if the string contains only alphabetic chars, false otherwise.
3111
     *
3112
     * @param string $str
3113
     *
3114
     * @return bool
3115
     *              Whether or not $str contains only alphabetic chars
3116
     */
3117 10
    public static function is_alpha(string $str): bool
3118
    {
3119 10
        if (self::$SUPPORT['mbstring'] === true) {
3120
            /** @noinspection PhpComposerExtensionStubsInspection */
3121 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3122
        }
3123
3124
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3125
    }
3126
3127
    /**
3128
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3129
     *
3130
     * @param string $str
3131
     *
3132
     * @return bool
3133
     *              Whether or not $str contains only alphanumeric chars
3134
     */
3135 13
    public static function is_alphanumeric(string $str): bool
3136
    {
3137 13
        if (self::$SUPPORT['mbstring'] === true) {
3138
            /** @noinspection PhpComposerExtensionStubsInspection */
3139 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3140
        }
3141
3142
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3143
    }
3144
3145
    /**
3146
     * Checks if a string is 7 bit ASCII.
3147
     *
3148
     * @param string $str <p>The string to check.</p>
3149
     *
3150
     * @return bool
3151
     *              <strong>true</strong> if it is ASCII<br>
3152
     *              <strong>false</strong> otherwise
3153
     */
3154 8
    public static function is_ascii(string $str): bool
3155
    {
3156 8
        return ASCII::is_ascii($str);
3157
    }
3158
3159
    /**
3160
     * Returns true if the string is base64 encoded, false otherwise.
3161
     *
3162
     * @param mixed|string $str                   <p>The input string.</p>
3163
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3164
     *
3165
     * @return bool whether or not $str is base64 encoded
3166
     */
3167 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3168
    {
3169
        if (
3170 16
            $empty_string_is_valid === false
3171
            &&
3172 16
            $str === ''
3173
        ) {
3174 3
            return false;
3175
        }
3176
3177
        /**
3178
         * @psalm-suppress RedundantConditionGivenDocblockType
3179
         */
3180 15
        if (\is_string($str) === false) {
3181 2
            return false;
3182
        }
3183
3184 15
        $base64String = \base64_decode($str, true);
3185
3186 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3187
    }
3188
3189
    /**
3190
     * Check if the input is binary... (is look like a hack).
3191
     *
3192
     * @param mixed $input
3193
     * @param bool  $strict
3194
     *
3195
     * @return bool
3196
     */
3197 39
    public static function is_binary($input, bool $strict = false): bool
3198
    {
3199 39
        $input = (string) $input;
3200 39
        if ($input === '') {
3201 10
            return false;
3202
        }
3203
3204 39
        if (\preg_match('~^[01]+$~', $input)) {
3205 13
            return true;
3206
        }
3207
3208 39
        $ext = self::get_file_type($input);
3209 39
        if ($ext['type'] === 'binary') {
3210 7
            return true;
3211
        }
3212
3213 38
        $test_length = \strlen($input);
3214 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3215 38
        if (($test_null_counting / $test_length) > 0.25) {
3216 15
            return true;
3217
        }
3218
3219 34
        if ($strict === true) {
3220 34
            if (self::$SUPPORT['finfo'] === false) {
3221
                throw new \RuntimeException('ext-fileinfo: is not installed');
3222
            }
3223
3224
            /** @noinspection PhpComposerExtensionStubsInspection */
3225 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3226 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3227 15
                return true;
3228
            }
3229
        }
3230
3231 30
        return false;
3232
    }
3233
3234
    /**
3235
     * Check if the file is binary.
3236
     *
3237
     * @param string $file
3238
     *
3239
     * @return bool
3240
     */
3241 6
    public static function is_binary_file($file): bool
3242
    {
3243
        // init
3244 6
        $block = '';
3245
3246 6
        $fp = \fopen($file, 'rb');
3247 6
        if (\is_resource($fp)) {
3248 6
            $block = \fread($fp, 512);
3249 6
            \fclose($fp);
3250
        }
3251
3252 6
        if ($block === '') {
3253 2
            return false;
3254
        }
3255
3256 6
        return self::is_binary($block, true);
3257
    }
3258
3259
    /**
3260
     * Returns true if the string contains only whitespace chars, false otherwise.
3261
     *
3262
     * @param string $str
3263
     *
3264
     * @return bool
3265
     *              Whether or not $str contains only whitespace characters
3266
     */
3267 15
    public static function is_blank(string $str): bool
3268
    {
3269 15
        if (self::$SUPPORT['mbstring'] === true) {
3270
            /** @noinspection PhpComposerExtensionStubsInspection */
3271 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3272
        }
3273
3274
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3275
    }
3276
3277
    /**
3278
     * Checks if the given string is equal to any "Byte Order Mark".
3279
     *
3280
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3281
     *
3282
     * @param string $str <p>The input string.</p>
3283
     *
3284
     * @return bool
3285
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3286
     */
3287 2
    public static function is_bom($str): bool
3288
    {
3289
        /** @noinspection PhpUnusedLocalVariableInspection */
3290 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3291 2
            if ($str === $bom_string) {
3292 2
                return true;
3293
            }
3294
        }
3295
3296 2
        return false;
3297
    }
3298
3299
    /**
3300
     * Determine whether the string is considered to be empty.
3301
     *
3302
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3303
     * empty() does not generate a warning if the variable does not exist.
3304
     *
3305
     * @param mixed $str
3306
     *
3307
     * @return bool whether or not $str is empty()
3308
     */
3309
    public static function is_empty($str): bool
3310
    {
3311
        return empty($str);
3312
    }
3313
3314
    /**
3315
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3316
     *
3317
     * @param string $str
3318
     *
3319
     * @return bool
3320
     *              Whether or not $str contains only hexadecimal chars
3321
     */
3322 13
    public static function is_hexadecimal(string $str): bool
3323
    {
3324 13
        if (self::$SUPPORT['mbstring'] === true) {
3325
            /** @noinspection PhpComposerExtensionStubsInspection */
3326 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3327
        }
3328
3329
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3330
    }
3331
3332
    /**
3333
     * Check if the string contains any html-tags.
3334
     *
3335
     * @param string $str <p>The input string.</p>
3336
     *
3337
     * @return bool
3338
     */
3339 3
    public static function is_html(string $str): bool
3340
    {
3341 3
        if ($str === '') {
3342 3
            return false;
3343
        }
3344
3345
        // init
3346 3
        $matches = [];
3347
3348 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3349
3350 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3351
3352 3
        return \count($matches) !== 0;
3353
    }
3354
3355
    /**
3356
     * Try to check if "$str" is an json-string.
3357
     *
3358
     * @param string $str                                    <p>The input string.</p>
3359
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3360
     *
3361
     * @return bool
3362
     */
3363 42
    public static function is_json(
3364
        string $str,
3365
        $only_array_or_object_results_are_valid = true
3366
    ): bool {
3367 42
        if ($str === '') {
3368 4
            return false;
3369
        }
3370
3371 40
        if (self::$SUPPORT['json'] === false) {
3372
            throw new \RuntimeException('ext-json: is not installed');
3373
        }
3374
3375 40
        $json = self::json_decode($str);
3376 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3377 18
            return false;
3378
        }
3379
3380
        if (
3381 24
            $only_array_or_object_results_are_valid === true
3382
            &&
3383 24
            \is_object($json) === false
3384
            &&
3385 24
            \is_array($json) === false
3386
        ) {
3387 5
            return false;
3388
        }
3389
3390
        /** @noinspection PhpComposerExtensionStubsInspection */
3391 19
        return \json_last_error() === \JSON_ERROR_NONE;
3392
    }
3393
3394
    /**
3395
     * @param string $str
3396
     *
3397
     * @return bool
3398
     */
3399 8
    public static function is_lowercase(string $str): bool
3400
    {
3401 8
        if (self::$SUPPORT['mbstring'] === true) {
3402
            /** @noinspection PhpComposerExtensionStubsInspection */
3403 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3404
        }
3405
3406
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3407
    }
3408
3409
    /**
3410
     * Returns true if the string is serialized, false otherwise.
3411
     *
3412
     * @param string $str
3413
     *
3414
     * @return bool whether or not $str is serialized
3415
     */
3416 7
    public static function is_serialized(string $str): bool
3417
    {
3418 7
        if ($str === '') {
3419 1
            return false;
3420
        }
3421
3422
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3423
        /** @noinspection UnserializeExploitsInspection */
3424 6
        return $str === 'b:0;'
3425
               ||
3426 6
               @\unserialize($str) !== false;
3427
    }
3428
3429
    /**
3430
     * Returns true if the string contains only lower case chars, false
3431
     * otherwise.
3432
     *
3433
     * @param string $str <p>The input string.</p>
3434
     *
3435
     * @return bool
3436
     *              <p>Whether or not $str contains only lower case characters.</p>
3437
     */
3438 8
    public static function is_uppercase(string $str): bool
3439
    {
3440 8
        if (self::$SUPPORT['mbstring'] === true) {
3441
            /** @noinspection PhpComposerExtensionStubsInspection */
3442 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3443
        }
3444
3445
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3446
    }
3447
3448
    /**
3449
     * Check if the string is UTF-16.
3450
     *
3451
     * @param mixed $str                       <p>The input string.</p>
3452
     * @param bool  $check_if_string_is_binary
3453
     *
3454
     * @return false|int
3455
     *                   <strong>false</strong> if is't not UTF-16,<br>
3456
     *                   <strong>1</strong> for UTF-16LE,<br>
3457
     *                   <strong>2</strong> for UTF-16BE
3458
     */
3459 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3460
    {
3461
        // init
3462 22
        $str = (string) $str;
3463 22
        $str_chars = [];
3464
3465
        if (
3466 22
            $check_if_string_is_binary === true
3467
            &&
3468 22
            self::is_binary($str, true) === false
3469
        ) {
3470 2
            return false;
3471
        }
3472
3473 22
        if (self::$SUPPORT['mbstring'] === false) {
3474 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3475
        }
3476
3477 22
        $str = self::remove_bom($str);
3478
3479 22
        $maybe_utf16le = 0;
3480 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3481 22
        if ($test) {
3482 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3483 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3484 15
            if ($test3 === $test) {
3485 15
                if (\count($str_chars) === 0) {
3486 15
                    $str_chars = self::count_chars($str, true, false);
3487
                }
3488 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3489 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3490 15
                        ++$maybe_utf16le;
3491
                    }
3492
                }
3493 15
                unset($test3charEmpty);
3494
            }
3495
        }
3496
3497 22
        $maybe_utf16be = 0;
3498 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3499 22
        if ($test) {
3500 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3501 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3502 15
            if ($test3 === $test) {
3503 15
                if (\count($str_chars) === 0) {
3504 7
                    $str_chars = self::count_chars($str, true, false);
3505
                }
3506 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3507 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3508 15
                        ++$maybe_utf16be;
3509
                    }
3510
                }
3511 15
                unset($test3charEmpty);
3512
            }
3513
        }
3514
3515 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3516 7
            if ($maybe_utf16le > $maybe_utf16be) {
3517 5
                return 1;
3518
            }
3519
3520 6
            return 2;
3521
        }
3522
3523 18
        return false;
3524
    }
3525
3526
    /**
3527
     * Check if the string is UTF-32.
3528
     *
3529
     * @param mixed $str                       <p>The input string.</p>
3530
     * @param bool  $check_if_string_is_binary
3531
     *
3532
     * @return false|int
3533
     *                   <strong>false</strong> if is't not UTF-32,<br>
3534
     *                   <strong>1</strong> for UTF-32LE,<br>
3535
     *                   <strong>2</strong> for UTF-32BE
3536
     */
3537 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3538
    {
3539
        // init
3540 20
        $str = (string) $str;
3541 20
        $str_chars = [];
3542
3543
        if (
3544 20
            $check_if_string_is_binary === true
3545
            &&
3546 20
            self::is_binary($str, true) === false
3547
        ) {
3548 2
            return false;
3549
        }
3550
3551 20
        if (self::$SUPPORT['mbstring'] === false) {
3552 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3553
        }
3554
3555 20
        $str = self::remove_bom($str);
3556
3557 20
        $maybe_utf32le = 0;
3558 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3559 20
        if ($test) {
3560 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3561 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3562 13
            if ($test3 === $test) {
3563 13
                if (\count($str_chars) === 0) {
3564 13
                    $str_chars = self::count_chars($str, true, false);
3565
                }
3566 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3567 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3568 13
                        ++$maybe_utf32le;
3569
                    }
3570
                }
3571 13
                unset($test3charEmpty);
3572
            }
3573
        }
3574
3575 20
        $maybe_utf32be = 0;
3576 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3577 20
        if ($test) {
3578 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3579 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3580 13
            if ($test3 === $test) {
3581 13
                if (\count($str_chars) === 0) {
3582 7
                    $str_chars = self::count_chars($str, true, false);
3583
                }
3584 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3585 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3586 13
                        ++$maybe_utf32be;
3587
                    }
3588
                }
3589 13
                unset($test3charEmpty);
3590
            }
3591
        }
3592
3593 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3594 3
            if ($maybe_utf32le > $maybe_utf32be) {
3595 2
                return 1;
3596
            }
3597
3598 3
            return 2;
3599
        }
3600
3601 20
        return false;
3602
    }
3603
3604
    /**
3605
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3606
     *
3607
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3608
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3609
     *
3610
     * @return bool
3611
     */
3612 82
    public static function is_utf8($str, bool $strict = false): bool
3613
    {
3614 82
        if (\is_array($str) === true) {
3615 2
            foreach ($str as &$v) {
3616 2
                if (self::is_utf8($v, $strict) === false) {
3617 2
                    return false;
3618
                }
3619
            }
3620
3621
            return true;
3622
        }
3623
3624 82
        return self::is_utf8_string((string) $str, $strict);
3625
    }
3626
3627
    /**
3628
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3629
     * Decodes a JSON string
3630
     *
3631
     * @see http://php.net/manual/en/function.json-decode.php
3632
     *
3633
     * @param string $json    <p>
3634
     *                        The <i>json</i> string being decoded.
3635
     *                        </p>
3636
     *                        <p>
3637
     *                        This function only works with UTF-8 encoded strings.
3638
     *                        </p>
3639
     *                        <p>PHP implements a superset of
3640
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3641
     *                        only supports these values when they are nested inside an array or an object.
3642
     *                        </p>
3643
     * @param bool   $assoc   [optional] <p>
3644
     *                        When <b>TRUE</b>, returned objects will be converted into
3645
     *                        associative arrays.
3646
     *                        </p>
3647
     * @param int    $depth   [optional] <p>
3648
     *                        User specified recursion depth.
3649
     *                        </p>
3650
     * @param int    $options [optional] <p>
3651
     *                        Bitmask of JSON decode options. Currently only
3652
     *                        <b>JSON_BIGINT_AS_STRING</b>
3653
     *                        is supported (default is to cast large integers as floats)
3654
     *                        </p>
3655
     *
3656
     * @return mixed
3657
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3658
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3659
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3660
     *               is deeper than the recursion limit.
3661
     */
3662 43
    public static function json_decode(
3663
        string $json,
3664
        bool $assoc = false,
3665
        int $depth = 512,
3666
        int $options = 0
3667
    ) {
3668 43
        $json = self::filter($json);
3669
3670 43
        if (self::$SUPPORT['json'] === false) {
3671
            throw new \RuntimeException('ext-json: is not installed');
3672
        }
3673
3674
        /** @noinspection PhpComposerExtensionStubsInspection */
3675 43
        return \json_decode($json, $assoc, $depth, $options);
3676
    }
3677
3678
    /**
3679
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3680
     * Returns the JSON representation of a value.
3681
     *
3682
     * @see http://php.net/manual/en/function.json-encode.php
3683
     *
3684
     * @param mixed $value   <p>
3685
     *                       The <i>value</i> being encoded. Can be any type except
3686
     *                       a resource.
3687
     *                       </p>
3688
     *                       <p>
3689
     *                       All string data must be UTF-8 encoded.
3690
     *                       </p>
3691
     *                       <p>PHP implements a superset of
3692
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3693
     *                       only supports these values when they are nested inside an array or an object.
3694
     *                       </p>
3695
     * @param int   $options [optional] <p>
3696
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3697
     *                       <b>JSON_HEX_TAG</b>,
3698
     *                       <b>JSON_HEX_AMP</b>,
3699
     *                       <b>JSON_HEX_APOS</b>,
3700
     *                       <b>JSON_NUMERIC_CHECK</b>,
3701
     *                       <b>JSON_PRETTY_PRINT</b>,
3702
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3703
     *                       <b>JSON_FORCE_OBJECT</b>,
3704
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3705
     *                       constants is described on
3706
     *                       the JSON constants page.
3707
     *                       </p>
3708
     * @param int   $depth   [optional] <p>
3709
     *                       Set the maximum depth. Must be greater than zero.
3710
     *                       </p>
3711
     *
3712
     * @return false|string
3713
     *                      A JSON encoded <strong>string</strong> on success or<br>
3714
     *                      <strong>FALSE</strong> on failure
3715
     */
3716 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3717
    {
3718 5
        $value = self::filter($value);
3719
3720 5
        if (self::$SUPPORT['json'] === false) {
3721
            throw new \RuntimeException('ext-json: is not installed');
3722
        }
3723
3724
        /** @noinspection PhpComposerExtensionStubsInspection */
3725 5
        return \json_encode($value, $options, $depth);
3726
    }
3727
3728
    /**
3729
     * Checks whether JSON is available on the server.
3730
     *
3731
     * @return bool
3732
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3733
     */
3734
    public static function json_loaded(): bool
3735
    {
3736
        return \function_exists('json_decode');
3737
    }
3738
3739
    /**
3740
     * Makes string's first char lowercase.
3741
     *
3742
     * @param string      $str                           <p>The input string</p>
3743
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3744
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3745
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3746
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3747
     *
3748
     * @return string the resulting string
3749
     */
3750 46
    public static function lcfirst(
3751
        string $str,
3752
        string $encoding = 'UTF-8',
3753
        bool $clean_utf8 = false,
3754
        string $lang = null,
3755
        bool $try_to_keep_the_string_length = false
3756
    ): string {
3757 46
        if ($clean_utf8 === true) {
3758
            $str = self::clean($str);
3759
        }
3760
3761 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3762
3763 46
        if ($encoding === 'UTF-8') {
3764 43
            $str_part_two = (string) \mb_substr($str, 1);
3765
3766 43
            if ($use_mb_functions === true) {
3767 43
                $str_part_one = \mb_strtolower(
3768 43
                    (string) \mb_substr($str, 0, 1)
3769
                );
3770
            } else {
3771
                $str_part_one = self::strtolower(
3772
                    (string) \mb_substr($str, 0, 1),
3773
                    $encoding,
3774
                    false,
3775
                    $lang,
3776 43
                    $try_to_keep_the_string_length
3777
                );
3778
            }
3779
        } else {
3780 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3781
3782 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3783
3784 3
            $str_part_one = self::strtolower(
3785 3
                (string) self::substr($str, 0, 1, $encoding),
3786 3
                $encoding,
3787 3
                false,
3788 3
                $lang,
3789 3
                $try_to_keep_the_string_length
3790
            );
3791
        }
3792
3793 46
        return $str_part_one . $str_part_two;
3794
    }
3795
3796
    /**
3797
     * alias for "UTF8::lcfirst()"
3798
     *
3799
     * @param string      $str
3800
     * @param string      $encoding
3801
     * @param bool        $clean_utf8
3802
     * @param string|null $lang
3803
     * @param bool        $try_to_keep_the_string_length
3804
     *
3805
     * @return string
3806
     *
3807
     * @see UTF8::lcfirst()
3808
     */
3809 2
    public static function lcword(
3810
        string $str,
3811
        string $encoding = 'UTF-8',
3812
        bool $clean_utf8 = false,
3813
        string $lang = null,
3814
        bool $try_to_keep_the_string_length = false
3815
    ): string {
3816 2
        return self::lcfirst($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3817
    }
3818
3819
    /**
3820
     * Lowercase for all words in the string.
3821
     *
3822
     * @param string      $str                           <p>The input string.</p>
3823
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3824
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3825
     *                                                   a new word.</p>
3826
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3827
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3828
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3829
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3830
     *
3831
     * @return string
3832
     */
3833 2
    public static function lcwords(
3834
        string $str,
3835
        array $exceptions = [],
3836
        string $char_list = '',
3837
        string $encoding = 'UTF-8',
3838
        bool $clean_utf8 = false,
3839
        string $lang = null,
3840
        bool $try_to_keep_the_string_length = false
3841
    ): string {
3842 2
        if (!$str) {
3843 2
            return '';
3844
        }
3845
3846 2
        $words = self::str_to_words($str, $char_list);
3847 2
        $use_exceptions = \count($exceptions) > 0;
3848
3849 2
        foreach ($words as &$word) {
3850 2
            if (!$word) {
3851 2
                continue;
3852
            }
3853
3854
            if (
3855 2
                $use_exceptions === false
3856
                ||
3857 2
                !\in_array($word, $exceptions, true)
3858
            ) {
3859 2
                $word = self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3860
            }
3861
        }
3862
3863 2
        return \implode('', $words);
3864
    }
3865
3866
    /**
3867
     * alias for "UTF8::lcfirst()"
3868
     *
3869
     * @param string      $str
3870
     * @param string      $encoding
3871
     * @param bool        $clean_utf8
3872
     * @param string|null $lang
3873
     * @param bool        $try_to_keep_the_string_length
3874
     *
3875
     * @return string
3876
     *
3877
     * @see UTF8::lcfirst()
3878
     */
3879 5
    public static function lowerCaseFirst(
3880
        string $str,
3881
        string $encoding = 'UTF-8',
3882
        bool $clean_utf8 = false,
3883
        string $lang = null,
3884
        bool $try_to_keep_the_string_length = false
3885
    ): string {
3886 5
        return self::lcfirst($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3887
    }
3888
3889
    /**
3890
     * Strip whitespace or other characters from beginning of a UTF-8 string.
3891
     *
3892
     * @param string      $str   <p>The string to be trimmed</p>
3893
     * @param string|null $chars <p>Optional characters to be stripped</p>
3894
     *
3895
     * @return string the string with unwanted characters stripped from the left
3896
     */
3897 22
    public static function ltrim(string $str = '', string $chars = null): string
3898
    {
3899 22
        if ($str === '') {
3900 3
            return '';
3901
        }
3902
3903 21
        if ($chars) {
3904 10
            $chars = \preg_quote($chars, '/');
3905 10
            $pattern = "^[${chars}]+";
3906
        } else {
3907 14
            $pattern = '^[\\s]+';
3908
        }
3909
3910 21
        if (self::$SUPPORT['mbstring'] === true) {
3911
            /** @noinspection PhpComposerExtensionStubsInspection */
3912 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3913
        }
3914
3915
        return self::regex_replace($str, $pattern, '', '', '/');
3916
    }
3917
3918
    /**
3919
     * Returns the UTF-8 character with the maximum code point in the given data.
3920
     *
3921
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3922
     *
3923
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3924
     */
3925 2
    public static function max($arg)
3926
    {
3927 2
        if (\is_array($arg) === true) {
3928 2
            $arg = \implode('', $arg);
3929
        }
3930
3931 2
        $codepoints = self::codepoints($arg, false);
3932 2
        if (\count($codepoints) === 0) {
3933 2
            return null;
3934
        }
3935
3936 2
        $codepoint_max = \max($codepoints);
3937
3938 2
        return self::chr($codepoint_max);
3939
    }
3940
3941
    /**
3942
     * Calculates and returns the maximum number of bytes taken by any
3943
     * UTF-8 encoded character in the given string.
3944
     *
3945
     * @param string $str <p>The original Unicode string.</p>
3946
     *
3947
     * @return int max byte lengths of the given chars
3948
     */
3949 2
    public static function max_chr_width(string $str): int
3950
    {
3951 2
        $bytes = self::chr_size_list($str);
3952 2
        if (\count($bytes) > 0) {
3953 2
            return (int) \max($bytes);
3954
        }
3955
3956 2
        return 0;
3957
    }
3958
3959
    /**
3960
     * Checks whether mbstring is available on the server.
3961
     *
3962
     * @return bool
3963
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3964
     */
3965 27
    public static function mbstring_loaded(): bool
3966
    {
3967 27
        return \extension_loaded('mbstring');
3968
    }
3969
3970
    /**
3971
     * Returns the UTF-8 character with the minimum code point in the given data.
3972
     *
3973
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3974
     *
3975
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3976
     */
3977 2
    public static function min($arg)
3978
    {
3979 2
        if (\is_array($arg) === true) {
3980 2
            $arg = \implode('', $arg);
3981
        }
3982
3983 2
        $codepoints = self::codepoints($arg, false);
3984 2
        if (\count($codepoints) === 0) {
3985 2
            return null;
3986
        }
3987
3988 2
        $codepoint_min = \min($codepoints);
3989
3990 2
        return self::chr($codepoint_min);
3991
    }
3992
3993
    /**
3994
     * alias for "UTF8::normalize_encoding()"
3995
     *
3996
     * @param mixed $encoding
3997
     * @param mixed $fallback
3998
     *
3999
     * @return mixed
4000
     *
4001
     * @see UTF8::normalize_encoding()
4002
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4003
     */
4004 2
    public static function normalizeEncoding($encoding, $fallback = '')
4005
    {
4006 2
        return self::normalize_encoding($encoding, $fallback);
4007
    }
4008
4009
    /**
4010
     * Normalize the encoding-"name" input.
4011
     *
4012
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4013
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4014
     *
4015
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4016
     */
4017 331
    public static function normalize_encoding($encoding, $fallback = '')
4018
    {
4019 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4020
4021
        // init
4022 331
        $encoding = (string) $encoding;
4023
4024 331
        if (!$encoding) {
4025 285
            return $fallback;
4026
        }
4027
4028
        if (
4029 51
            $encoding === 'UTF-8'
4030
            ||
4031 51
            $encoding === 'UTF8'
4032
        ) {
4033 26
            return 'UTF-8';
4034
        }
4035
4036
        if (
4037 43
            $encoding === '8BIT'
4038
            ||
4039 43
            $encoding === 'BINARY'
4040
        ) {
4041
            return 'CP850';
4042
        }
4043
4044
        if (
4045 43
            $encoding === 'HTML'
4046
            ||
4047 43
            $encoding === 'HTML-ENTITIES'
4048
        ) {
4049 2
            return 'HTML-ENTITIES';
4050
        }
4051
4052
        if (
4053 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4054
            ||
4055 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4056
        ) {
4057 1
            return $fallback;
4058
        }
4059
4060 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4061 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4062
        }
4063
4064 6
        if (self::$ENCODINGS === null) {
4065 1
            self::$ENCODINGS = self::getData('encodings');
4066
        }
4067
4068 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4069 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4070
4071 4
            return $encoding;
4072
        }
4073
4074 5
        $encoding_original = $encoding;
4075 5
        $encoding = \strtoupper($encoding);
4076 5
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4077
4078
        $equivalences = [
4079 5
            'ISO8859'     => 'ISO-8859-1',
4080
            'ISO88591'    => 'ISO-8859-1',
4081
            'ISO'         => 'ISO-8859-1',
4082
            'LATIN'       => 'ISO-8859-1',
4083
            'LATIN1'      => 'ISO-8859-1', // Western European
4084
            'ISO88592'    => 'ISO-8859-2',
4085
            'LATIN2'      => 'ISO-8859-2', // Central European
4086
            'ISO88593'    => 'ISO-8859-3',
4087
            'LATIN3'      => 'ISO-8859-3', // Southern European
4088
            'ISO88594'    => 'ISO-8859-4',
4089
            'LATIN4'      => 'ISO-8859-4', // Northern European
4090
            'ISO88595'    => 'ISO-8859-5',
4091
            'ISO88596'    => 'ISO-8859-6', // Greek
4092
            'ISO88597'    => 'ISO-8859-7',
4093
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4094
            'ISO88599'    => 'ISO-8859-9',
4095
            'LATIN5'      => 'ISO-8859-9', // Turkish
4096
            'ISO885911'   => 'ISO-8859-11',
4097
            'TIS620'      => 'ISO-8859-11', // Thai
4098
            'ISO885910'   => 'ISO-8859-10',
4099
            'LATIN6'      => 'ISO-8859-10', // Nordic
4100
            'ISO885913'   => 'ISO-8859-13',
4101
            'LATIN7'      => 'ISO-8859-13', // Baltic
4102
            'ISO885914'   => 'ISO-8859-14',
4103
            'LATIN8'      => 'ISO-8859-14', // Celtic
4104
            'ISO885915'   => 'ISO-8859-15',
4105
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4106
            'ISO885916'   => 'ISO-8859-16',
4107
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4108
            'CP1250'      => 'WINDOWS-1250',
4109
            'WIN1250'     => 'WINDOWS-1250',
4110
            'WINDOWS1250' => 'WINDOWS-1250',
4111
            'CP1251'      => 'WINDOWS-1251',
4112
            'WIN1251'     => 'WINDOWS-1251',
4113
            'WINDOWS1251' => 'WINDOWS-1251',
4114
            'CP1252'      => 'WINDOWS-1252',
4115
            'WIN1252'     => 'WINDOWS-1252',
4116
            'WINDOWS1252' => 'WINDOWS-1252',
4117
            'CP1253'      => 'WINDOWS-1253',
4118
            'WIN1253'     => 'WINDOWS-1253',
4119
            'WINDOWS1253' => 'WINDOWS-1253',
4120
            'CP1254'      => 'WINDOWS-1254',
4121
            'WIN1254'     => 'WINDOWS-1254',
4122
            'WINDOWS1254' => 'WINDOWS-1254',
4123
            'CP1255'      => 'WINDOWS-1255',
4124
            'WIN1255'     => 'WINDOWS-1255',
4125
            'WINDOWS1255' => 'WINDOWS-1255',
4126
            'CP1256'      => 'WINDOWS-1256',
4127
            'WIN1256'     => 'WINDOWS-1256',
4128
            'WINDOWS1256' => 'WINDOWS-1256',
4129
            'CP1257'      => 'WINDOWS-1257',
4130
            'WIN1257'     => 'WINDOWS-1257',
4131
            'WINDOWS1257' => 'WINDOWS-1257',
4132
            'CP1258'      => 'WINDOWS-1258',
4133
            'WIN1258'     => 'WINDOWS-1258',
4134
            'WINDOWS1258' => 'WINDOWS-1258',
4135
            'UTF16'       => 'UTF-16',
4136
            'UTF32'       => 'UTF-32',
4137
            'UTF8'        => 'UTF-8',
4138
            'UTF'         => 'UTF-8',
4139
            'UTF7'        => 'UTF-7',
4140
            '8BIT'        => 'CP850',
4141
            'BINARY'      => 'CP850',
4142
        ];
4143
4144 5
        if (!empty($equivalences[$encoding_upper_helper])) {
4145 4
            $encoding = $equivalences[$encoding_upper_helper];
4146
        }
4147
4148 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4149
4150 5
        return $encoding;
4151
    }
4152
4153
    /**
4154
     * Standardize line ending to unix-like.
4155
     *
4156
     * @param string $str
4157
     *
4158
     * @return string
4159
     */
4160 5
    public static function normalize_line_ending(string $str): string
4161
    {
4162 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4163
    }
4164
4165
    /**
4166
     * Normalize some MS Word special characters.
4167
     *
4168
     * @param string $str <p>The string to be normalized.</p>
4169
     *
4170
     * @return string
4171
     */
4172 10
    public static function normalize_msword(string $str): string
4173
    {
4174 10
        return ASCII::normalize_msword($str);
4175
    }
4176
4177
    /**
4178
     * Normalize the whitespace.
4179
     *
4180
     * @param string $str                        <p>The string to be normalized.</p>
4181
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4182
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4183
     *                                           bidirectional text chars.</p>
4184
     *
4185
     * @return string
4186
     */
4187 61
    public static function normalize_whitespace(
4188
        string $str,
4189
        bool $keep_non_breaking_space = false,
4190
        bool $keep_bidi_unicode_controls = false
4191
    ): string {
4192 61
        return ASCII::normalize_whitespace(
4193 61
            $str,
4194 61
            $keep_non_breaking_space,
4195 61
            $keep_bidi_unicode_controls
4196
        );
4197
    }
4198
4199
    /**
4200
     * Calculates Unicode code point of the given UTF-8 encoded character.
4201
     *
4202
     * INFO: opposite to UTF8::chr()
4203
     *
4204
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4205
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4206
     *
4207
     * @return int
4208
     *             Unicode code point of the given character,<br>
4209
     *             0 on invalid UTF-8 byte sequence
4210
     */
4211 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4212
    {
4213 30
        static $CHAR_CACHE = [];
4214
4215
        // init
4216 30
        $chr = (string) $chr;
4217
4218 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4219 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4220
        }
4221
4222 30
        $cache_key = $chr . $encoding;
4223 30
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4224 30
            return $CHAR_CACHE[$cache_key];
4225
        }
4226
4227
        // check again, if it's still not UTF-8
4228 12
        if ($encoding !== 'UTF-8') {
4229 3
            $chr = self::encode($encoding, $chr);
4230
        }
4231
4232 12
        if (self::$ORD === null) {
4233
            self::$ORD = self::getData('ord');
4234
        }
4235
4236 12
        if (isset(self::$ORD[$chr])) {
4237 12
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4238
        }
4239
4240
        //
4241
        // fallback via "IntlChar"
4242
        //
4243
4244 6
        if (self::$SUPPORT['intlChar'] === true) {
4245
            /** @noinspection PhpComposerExtensionStubsInspection */
4246 5
            $code = \IntlChar::ord($chr);
4247 5
            if ($code) {
4248 5
                return $CHAR_CACHE[$cache_key] = $code;
4249
            }
4250
        }
4251
4252
        //
4253
        // fallback via vanilla php
4254
        //
4255
4256
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4257 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4258
        /** @noinspection OffsetOperationsInspection */
4259 1
        $code = $chr ? $chr[1] : 0;
4260
4261
        /** @noinspection OffsetOperationsInspection */
4262 1
        if ($code >= 0xF0 && isset($chr[4])) {
4263
            /** @noinspection UnnecessaryCastingInspection */
4264
            /** @noinspection OffsetOperationsInspection */
4265
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4266
        }
4267
4268
        /** @noinspection OffsetOperationsInspection */
4269 1
        if ($code >= 0xE0 && isset($chr[3])) {
4270
            /** @noinspection UnnecessaryCastingInspection */
4271
            /** @noinspection OffsetOperationsInspection */
4272 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4273
        }
4274
4275
        /** @noinspection OffsetOperationsInspection */
4276 1
        if ($code >= 0xC0 && isset($chr[2])) {
4277
            /** @noinspection UnnecessaryCastingInspection */
4278
            /** @noinspection OffsetOperationsInspection */
4279 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4280
        }
4281
4282
        return $CHAR_CACHE[$cache_key] = $code;
4283
    }
4284
4285
    /**
4286
     * Parses the string into an array (into the the second parameter).
4287
     *
4288
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4289
     *          if the second parameter is not set!
4290
     *
4291
     * @see http://php.net/manual/en/function.parse-str.php
4292
     *
4293
     * @param string $str        <p>The input string.</p>
4294
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4295
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4296
     *
4297
     * @return bool
4298
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4299
     */
4300 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4301
    {
4302 2
        if ($clean_utf8 === true) {
4303 2
            $str = self::clean($str);
4304
        }
4305
4306 2
        if (self::$SUPPORT['mbstring'] === true) {
4307 2
            $return = \mb_parse_str($str, $result);
4308
4309 2
            return $return !== false && $result !== [];
4310
        }
4311
4312
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4313
        \parse_str($str, $result);
4314
4315
        return $result !== [];
4316
    }
4317
4318
    /**
4319
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4320
     *
4321
     * @return bool
4322
     *              <strong>true</strong> if support is available,<br>
4323
     *              <strong>false</strong> otherwise
4324
     */
4325 102
    public static function pcre_utf8_support(): bool
4326
    {
4327
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4328 102
        return (bool) @\preg_match('//u', '');
4329
    }
4330
4331
    /**
4332
     * Create an array containing a range of UTF-8 characters.
4333
     *
4334
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4335
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4336
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4337
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4338
     * @param float|int $step      [optional] <p>
4339
     *                             If a step value is given, it will be used as the
4340
     *                             increment between elements in the sequence. step
4341
     *                             should be given as a positive number. If not specified,
4342
     *                             step will default to 1.
4343
     *                             </p>
4344
     *
4345
     * @return string[]
4346
     */
4347 2
    public static function range(
4348
        $var1,
4349
        $var2,
4350
        bool $use_ctype = true,
4351
        string $encoding = 'UTF-8',
4352
        $step = 1
4353
    ): array {
4354 2
        if (!$var1 || !$var2) {
4355 2
            return [];
4356
        }
4357
4358 2
        if ($step !== 1) {
4359 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4360
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4361
            }
4362
4363 1
            if ($step <= 0) {
4364
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4365
            }
4366
        }
4367
4368 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4369
            throw new \RuntimeException('ext-ctype: is not installed');
4370
        }
4371
4372 2
        $is_digit = false;
4373 2
        $is_xdigit = false;
4374
4375
        /** @noinspection PhpComposerExtensionStubsInspection */
4376 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4377 2
            $is_digit = true;
4378 2
            $start = (int) $var1;
4379 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4380
            $is_xdigit = true;
4381
            $start = (int) self::hex_to_int($var1);
4382 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4383 1
            $start = (int) $var1;
4384
        } else {
4385 2
            $start = self::ord($var1);
4386
        }
4387
4388 2
        if (!$start) {
4389
            return [];
4390
        }
4391
4392 2
        if ($is_digit) {
4393 2
            $end = (int) $var2;
4394 2
        } elseif ($is_xdigit) {
4395
            $end = (int) self::hex_to_int($var2);
4396 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4397 1
            $end = (int) $var2;
4398
        } else {
4399 2
            $end = self::ord($var2);
4400
        }
4401
4402 2
        if (!$end) {
4403
            return [];
4404
        }
4405
4406 2
        $array = [];
4407 2
        foreach (\range($start, $end, $step) as $i) {
4408 2
            $array[] = (string) self::chr((int) $i, $encoding);
4409
        }
4410
4411 2
        return $array;
4412
    }
4413
4414
    /**
4415
     * Multi decode html entity & fix urlencoded-win1252-chars.
4416
     *
4417
     * e.g:
4418
     * 'test+test'                     => 'test+test'
4419
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4420
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4421
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4422
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4423
     * 'Düsseldorf'                   => 'Düsseldorf'
4424
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4425
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4426
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4427
     *
4428
     * @param string $str          <p>The input string.</p>
4429
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4430
     *
4431
     * @return string
4432
     */
4433 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4434
    {
4435 6
        if ($str === '') {
4436 4
            return '';
4437
        }
4438
4439
        if (
4440 6
            \strpos($str, '&') === false
4441
            &&
4442 6
            \strpos($str, '%') === false
4443
            &&
4444 6
            \strpos($str, '+') === false
4445
            &&
4446 6
            \strpos($str, '\u') === false
4447
        ) {
4448 4
            return self::fix_simple_utf8($str);
4449
        }
4450
4451 6
        $str = self::urldecode_unicode_helper($str);
4452
4453
        do {
4454 6
            $str_compare = $str;
4455
4456
            /**
4457
             * @psalm-suppress PossiblyInvalidArgument
4458
             */
4459 6
            $str = self::fix_simple_utf8(
4460 6
                \rawurldecode(
4461 6
                    self::html_entity_decode(
4462 6
                        self::to_utf8($str),
4463 6
                        \ENT_QUOTES | \ENT_HTML5
4464
                    )
4465
                )
4466
            );
4467 6
        } while ($multi_decode === true && $str_compare !== $str);
4468
4469 6
        return $str;
4470
    }
4471
4472
    /**
4473
     * Replaces all occurrences of $pattern in $str by $replacement.
4474
     *
4475
     * @param string $str         <p>The input string.</p>
4476
     * @param string $pattern     <p>The regular expression pattern.</p>
4477
     * @param string $replacement <p>The string to replace with.</p>
4478
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4479
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4480
     *
4481
     * @return string
4482
     */
4483 18
    public static function regex_replace(
4484
        string $str,
4485
        string $pattern,
4486
        string $replacement,
4487
        string $options = '',
4488
        string $delimiter = '/'
4489
    ): string {
4490 18
        if ($options === 'msr') {
4491 9
            $options = 'ms';
4492
        }
4493
4494
        // fallback
4495 18
        if (!$delimiter) {
4496
            $delimiter = '/';
4497
        }
4498
4499 18
        return (string) \preg_replace(
4500 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4501 18
            $replacement,
4502 18
            $str
4503
        );
4504
    }
4505
4506
    /**
4507
     * alias for "UTF8::remove_bom()"
4508
     *
4509
     * @param string $str
4510
     *
4511
     * @return string
4512
     *
4513
     * @see UTF8::remove_bom()
4514
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4515
     */
4516
    public static function removeBOM(string $str): string
4517
    {
4518
        return self::remove_bom($str);
4519
    }
4520
4521
    /**
4522
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4523
     *
4524
     * @param string $str <p>The input string.</p>
4525
     *
4526
     * @return string string without UTF-BOM
4527
     */
4528 55
    public static function remove_bom(string $str): string
4529
    {
4530 55
        if ($str === '') {
4531 9
            return '';
4532
        }
4533
4534 55
        $str_length = \strlen($str);
4535 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4536 55
            if (\strpos($str, $bom_string, 0) === 0) {
4537 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4538 11
                if ($str_tmp === false) {
4539
                    return '';
4540
                }
4541
4542 11
                $str_length -= (int) $bom_byte_length;
4543
4544 55
                $str = (string) $str_tmp;
4545
            }
4546
        }
4547
4548 55
        return $str;
4549
    }
4550
4551
    /**
4552
     * Removes duplicate occurrences of a string in another string.
4553
     *
4554
     * @param string          $str  <p>The base string.</p>
4555
     * @param string|string[] $what <p>String to search for in the base string.</p>
4556
     *
4557
     * @return string the result string with removed duplicates
4558
     */
4559 2
    public static function remove_duplicates(string $str, $what = ' '): string
4560
    {
4561 2
        if (\is_string($what) === true) {
4562 2
            $what = [$what];
4563
        }
4564
4565 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4566
            /** @noinspection ForeachSourceInspection */
4567 2
            foreach ($what as $item) {
4568 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4569
            }
4570
        }
4571
4572 2
        return $str;
4573
    }
4574
4575
    /**
4576
     * Remove html via "strip_tags()" from the string.
4577
     *
4578
     * @param string $str
4579
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4580
     *                               not be stripped. Default: null
4581
     *                               </p>
4582
     *
4583
     * @return string
4584
     */
4585 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
4586
    {
4587 6
        return \strip_tags($str, $allowable_tags);
4588
    }
4589
4590
    /**
4591
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4592
     *
4593
     * @param string $str
4594
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4595
     *
4596
     * @return string
4597
     */
4598 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4599
    {
4600 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4601
    }
4602
4603
    /**
4604
     * Remove invisible characters from a string.
4605
     *
4606
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4607
     *
4608
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4609
     *
4610
     * @param string $str
4611
     * @param bool   $url_encoded
4612
     * @param string $replacement
4613
     *
4614
     * @return string
4615
     */
4616 89
    public static function remove_invisible_characters(
4617
        string $str,
4618
        bool $url_encoded = true,
4619
        string $replacement = ''
4620
    ): string {
4621 89
        return ASCII::remove_invisible_characters(
4622 89
            $str,
4623 89
            $url_encoded,
4624 89
            $replacement
4625
        );
4626
    }
4627
4628
    /**
4629
     * Returns a new string with the prefix $substring removed, if present.
4630
     *
4631
     * @param string $str
4632
     * @param string $substring <p>The prefix to remove.</p>
4633
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4634
     *
4635
     * @return string string without the prefix $substring
4636
     */
4637 12
    public static function remove_left(
4638
        string $str,
4639
        string $substring,
4640
        string $encoding = 'UTF-8'
4641
    ): string {
4642 12
        if ($substring && \strpos($str, $substring) === 0) {
4643 6
            if ($encoding === 'UTF-8') {
4644 4
                return (string) \mb_substr(
4645 4
                    $str,
4646 4
                    (int) \mb_strlen($substring)
4647
                );
4648
            }
4649
4650 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4651
4652 2
            return (string) self::substr(
4653 2
                $str,
4654 2
                (int) self::strlen($substring, $encoding),
4655 2
                null,
4656 2
                $encoding
4657
            );
4658
        }
4659
4660 6
        return $str;
4661
    }
4662
4663
    /**
4664
     * Returns a new string with the suffix $substring removed, if present.
4665
     *
4666
     * @param string $str
4667
     * @param string $substring <p>The suffix to remove.</p>
4668
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4669
     *
4670
     * @return string string having a $str without the suffix $substring
4671
     */
4672 12
    public static function remove_right(
4673
        string $str,
4674
        string $substring,
4675
        string $encoding = 'UTF-8'
4676
    ): string {
4677 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4678 6
            if ($encoding === 'UTF-8') {
4679 4
                return (string) \mb_substr(
4680 4
                    $str,
4681 4
                    0,
4682 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4683
                );
4684
            }
4685
4686 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4687
4688 2
            return (string) self::substr(
4689 2
                $str,
4690 2
                0,
4691 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4692 2
                $encoding
4693
            );
4694
        }
4695
4696 6
        return $str;
4697
    }
4698
4699
    /**
4700
     * Replaces all occurrences of $search in $str by $replacement.
4701
     *
4702
     * @param string $str            <p>The input string.</p>
4703
     * @param string $search         <p>The needle to search for.</p>
4704
     * @param string $replacement    <p>The string to replace with.</p>
4705
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4706
     *
4707
     * @return string string after the replacements
4708
     */
4709 29
    public static function replace(
4710
        string $str,
4711
        string $search,
4712
        string $replacement,
4713
        bool $case_sensitive = true
4714
    ): string {
4715 29
        if ($case_sensitive) {
4716 22
            return \str_replace($search, $replacement, $str);
4717
        }
4718
4719 7
        return self::str_ireplace($search, $replacement, $str);
4720
    }
4721
4722
    /**
4723
     * Replaces all occurrences of $search in $str by $replacement.
4724
     *
4725
     * @param string       $str            <p>The input string.</p>
4726
     * @param array        $search         <p>The elements to search for.</p>
4727
     * @param array|string $replacement    <p>The string to replace with.</p>
4728
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4729
     *
4730
     * @return string string after the replacements
4731
     */
4732 30
    public static function replace_all(
4733
        string $str,
4734
        array $search,
4735
        $replacement,
4736
        bool $case_sensitive = true
4737
    ): string {
4738 30
        if ($case_sensitive) {
4739 23
            return \str_replace($search, $replacement, $str);
4740
        }
4741
4742 7
        return self::str_ireplace($search, $replacement, $str);
4743
    }
4744
4745
    /**
4746
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4747
     *
4748
     * @param string $str                        <p>The input string</p>
4749
     * @param string $replacement_char           <p>The replacement character.</p>
4750
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4751
     *
4752
     * @return string
4753
     */
4754 35
    public static function replace_diamond_question_mark(
4755
        string $str,
4756
        string $replacement_char = '',
4757
        bool $process_invalid_utf8_chars = true
4758
    ): string {
4759 35
        if ($str === '') {
4760 9
            return '';
4761
        }
4762
4763 35
        if ($process_invalid_utf8_chars === true) {
4764 35
            $replacement_char_helper = $replacement_char;
4765 35
            if ($replacement_char === '') {
4766 35
                $replacement_char_helper = 'none';
4767
            }
4768
4769 35
            if (self::$SUPPORT['mbstring'] === false) {
4770
                // if there is no native support for "mbstring",
4771
                // then we need to clean the string before ...
4772
                $str = self::clean($str);
4773
            }
4774
4775 35
            $save = \mb_substitute_character();
4776 35
            \mb_substitute_character($replacement_char_helper);
4777
            // the polyfill maybe return false, so cast to string
4778 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4779 35
            \mb_substitute_character($save);
4780
        }
4781
4782 35
        return \str_replace(
4783
            [
4784 35
                "\xEF\xBF\xBD",
4785
                '�',
4786
            ],
4787
            [
4788 35
                $replacement_char,
4789 35
                $replacement_char,
4790
            ],
4791 35
            $str
4792
        );
4793
    }
4794
4795
    /**
4796
     * Strip whitespace or other characters from end of a UTF-8 string.
4797
     *
4798
     * @param string      $str   <p>The string to be trimmed.</p>
4799
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4800
     *
4801
     * @return string the string with unwanted characters stripped from the right
4802
     */
4803 20
    public static function rtrim(string $str = '', string $chars = null): string
4804
    {
4805 20
        if ($str === '') {
4806 3
            return '';
4807
        }
4808
4809 19
        if ($chars) {
4810 8
            $chars = \preg_quote($chars, '/');
4811 8
            $pattern = "[${chars}]+$";
4812
        } else {
4813 14
            $pattern = '[\\s]+$';
4814
        }
4815
4816 19
        if (self::$SUPPORT['mbstring'] === true) {
4817
            /** @noinspection PhpComposerExtensionStubsInspection */
4818 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4819
        }
4820
4821
        return self::regex_replace($str, $pattern, '', '', '/');
4822
    }
4823
4824
    /**
4825
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4826
     *
4827
     * @psalm-suppress MissingReturnType
4828
     */
4829 2
    public static function showSupport()
4830
    {
4831 2
        echo '<pre>';
4832 2
        foreach (self::$SUPPORT as $key => &$value) {
4833 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4834
        }
4835 2
        unset($value);
4836 2
        echo '</pre>';
4837 2
    }
4838
4839
    /**
4840
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4841
     *
4842
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4843
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4844
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4845
     *
4846
     * @return string the HTML numbered entity
4847
     */
4848 2
    public static function single_chr_html_encode(
4849
        string $char,
4850
        bool $keep_ascii_chars = false,
4851
        string $encoding = 'UTF-8'
4852
    ): string {
4853 2
        if ($char === '') {
4854 2
            return '';
4855
        }
4856
4857
        if (
4858 2
            $keep_ascii_chars === true
4859
            &&
4860 2
            ASCII::is_ascii($char) === true
4861
        ) {
4862 2
            return $char;
4863
        }
4864
4865 2
        return '&#' . self::ord($char, $encoding) . ';';
4866
    }
4867
4868
    /**
4869
     * @param string $str
4870
     * @param int    $tab_length
4871
     *
4872
     * @return string
4873
     */
4874 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
4875
    {
4876 5
        if ($tab_length === 4) {
4877 3
            $tab = '    ';
4878 2
        } elseif ($tab_length === 2) {
4879 1
            $tab = '  ';
4880
        } else {
4881 1
            $tab = \str_repeat(' ', $tab_length);
4882
        }
4883
4884 5
        return \str_replace($tab, "\t", $str);
4885
    }
4886
4887
    /**
4888
     * alias for "UTF8::str_split()"
4889
     *
4890
     * @param string|string[] $str
4891
     * @param int             $length
4892
     * @param bool            $clean_utf8
4893
     *
4894
     * @return string[]
4895
     *
4896
     * @see UTF8::str_split()
4897
     */
4898 9
    public static function split(
4899
        $str,
4900
        int $length = 1,
4901
        bool $clean_utf8 = false
4902
    ): array {
4903 9
        return self::str_split($str, $length, $clean_utf8);
4904
    }
4905
4906
    /**
4907
     * alias for "UTF8::str_starts_with()"
4908
     *
4909
     * @param string $haystack
4910
     * @param string $needle
4911
     *
4912
     * @return bool
4913
     *
4914
     * @see UTF8::str_starts_with()
4915
     */
4916
    public static function str_begins(string $haystack, string $needle): bool
4917
    {
4918
        return self::str_starts_with($haystack, $needle);
4919
    }
4920
4921
    /**
4922
     * Returns a camelCase version of the string. Trims surrounding spaces,
4923
     * capitalizes letters following digits, spaces, dashes and underscores,
4924
     * and removes spaces, dashes, as well as underscores.
4925
     *
4926
     * @param string      $str                           <p>The input string.</p>
4927
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
4928
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4929
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4930
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4931
     *
4932
     * @return string
4933
     */
4934 32
    public static function str_camelize(
4935
        string $str,
4936
        string $encoding = 'UTF-8',
4937
        bool $clean_utf8 = false,
4938
        string $lang = null,
4939
        bool $try_to_keep_the_string_length = false
4940
    ): string {
4941 32
        if ($clean_utf8 === true) {
4942
            $str = self::clean($str);
4943
        }
4944
4945 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4946 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4947
        }
4948
4949 32
        $str = self::lcfirst(
4950 32
            \trim($str),
4951 32
            $encoding,
4952 32
            false,
4953 32
            $lang,
4954 32
            $try_to_keep_the_string_length
4955
        );
4956 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4957
4958 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
4959
4960 32
        $str = (string) \preg_replace_callback(
4961 32
            '/[-_\\s]+(.)?/u',
4962
            /**
4963
             * @param array $match
4964
             *
4965
             * @return string
4966
             */
4967
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
4968 27
                if (isset($match[1])) {
4969 27
                    if ($use_mb_functions === true) {
4970 27
                        if ($encoding === 'UTF-8') {
4971 27
                            return \mb_strtoupper($match[1]);
4972
                        }
4973
4974
                        return \mb_strtoupper($match[1], $encoding);
4975
                    }
4976
4977
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
4978
                }
4979
4980 1
                return '';
4981 32
            },
4982 32
            $str
4983
        );
4984
4985 32
        return (string) \preg_replace_callback(
4986 32
            '/[\\p{N}]+(.)?/u',
4987
            /**
4988
             * @param array $match
4989
             *
4990
             * @return string
4991
             */
4992
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
4993 6
                if ($use_mb_functions === true) {
4994 6
                    if ($encoding === 'UTF-8') {
4995 6
                        return \mb_strtoupper($match[0]);
4996
                    }
4997
4998
                    return \mb_strtoupper($match[0], $encoding);
4999
                }
5000
5001
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5002 32
            },
5003 32
            $str
5004
        );
5005
    }
5006
5007
    /**
5008
     * Returns the string with the first letter of each word capitalized,
5009
     * except for when the word is a name which shouldn't be capitalized.
5010
     *
5011
     * @param string $str
5012
     *
5013
     * @return string string with $str capitalized
5014
     */
5015 1
    public static function str_capitalize_name(string $str): string
5016
    {
5017 1
        return self::str_capitalize_name_helper(
5018 1
            self::str_capitalize_name_helper(
5019 1
                self::collapse_whitespace($str),
5020 1
                ' '
5021
            ),
5022 1
            '-'
5023
        );
5024
    }
5025
5026
    /**
5027
     * Returns true if the string contains $needle, false otherwise. By default
5028
     * the comparison is case-sensitive, but can be made insensitive by setting
5029
     * $case_sensitive to false.
5030
     *
5031
     * @param string $haystack       <p>The input string.</p>
5032
     * @param string $needle         <p>Substring to look for.</p>
5033
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5034
     *
5035
     * @return bool whether or not $haystack contains $needle
5036
     */
5037 21
    public static function str_contains(
5038
        string $haystack,
5039
        string $needle,
5040
        bool $case_sensitive = true
5041
    ): bool {
5042 21
        if ($case_sensitive) {
5043 11
            return \strpos($haystack, $needle) !== false;
5044
        }
5045
5046 10
        return \mb_stripos($haystack, $needle) !== false;
5047
    }
5048
5049
    /**
5050
     * Returns true if the string contains all $needles, false otherwise. By
5051
     * default the comparison is case-sensitive, but can be made insensitive by
5052
     * setting $case_sensitive to false.
5053
     *
5054
     * @param string $haystack       <p>The input string.</p>
5055
     * @param array  $needles        <p>SubStrings to look for.</p>
5056
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5057
     *
5058
     * @return bool whether or not $haystack contains $needle
5059
     */
5060 44
    public static function str_contains_all(
5061
        string $haystack,
5062
        array $needles,
5063
        bool $case_sensitive = true
5064
    ): bool {
5065 44
        if ($haystack === '' || $needles === []) {
5066 1
            return false;
5067
        }
5068
5069
        /** @noinspection LoopWhichDoesNotLoopInspection */
5070 43
        foreach ($needles as &$needle) {
5071 43
            if (!$needle) {
5072 1
                return false;
5073
            }
5074
5075 42
            if ($case_sensitive) {
5076 22
                return \strpos($haystack, $needle) !== false;
5077
            }
5078
5079 20
            return \mb_stripos($haystack, $needle) !== false;
5080
        }
5081
5082
        return true;
5083
    }
5084
5085
    /**
5086
     * Returns true if the string contains any $needles, false otherwise. By
5087
     * default the comparison is case-sensitive, but can be made insensitive by
5088
     * setting $case_sensitive to false.
5089
     *
5090
     * @param string $haystack       <p>The input string.</p>
5091
     * @param array  $needles        <p>SubStrings to look for.</p>
5092
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5093
     *
5094
     * @return bool
5095
     *              Whether or not $str contains $needle
5096
     */
5097 46
    public static function str_contains_any(
5098
        string $haystack,
5099
        array $needles,
5100
        bool $case_sensitive = true
5101
    ): bool {
5102 46
        if ($haystack === '' || $needles === []) {
5103 1
            return false;
5104
        }
5105
5106
        /** @noinspection LoopWhichDoesNotLoopInspection */
5107 45
        foreach ($needles as &$needle) {
5108 45
            if (!$needle) {
5109
                continue;
5110
            }
5111
5112 45
            if ($case_sensitive) {
5113 25
                if (\strpos($haystack, $needle) !== false) {
5114 14
                    return true;
5115
                }
5116
5117 13
                continue;
5118
            }
5119
5120 20
            if (\mb_stripos($haystack, $needle) !== false) {
5121 20
                return true;
5122
            }
5123
        }
5124
5125 19
        return false;
5126
    }
5127
5128
    /**
5129
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5130
     * inserted before uppercase characters (with the exception of the first
5131
     * character of the string), and in place of spaces as well as underscores.
5132
     *
5133
     * @param string $str      <p>The input string.</p>
5134
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5135
     *
5136
     * @return string
5137
     */
5138 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5139
    {
5140 19
        return self::str_delimit($str, '-', $encoding);
5141
    }
5142
5143
    /**
5144
     * Returns a lowercase and trimmed string separated by the given delimiter.
5145
     * Delimiters are inserted before uppercase characters (with the exception
5146
     * of the first character of the string), and in place of spaces, dashes,
5147
     * and underscores. Alpha delimiters are not converted to lowercase.
5148
     *
5149
     * @param string      $str                           <p>The input string.</p>
5150
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5151
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5152
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5153
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5154
     *                                                   tr</p>
5155
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5156
     *                                                   ß</p>
5157
     *
5158
     * @return string
5159
     */
5160 49
    public static function str_delimit(
5161
        string $str,
5162
        string $delimiter,
5163
        string $encoding = 'UTF-8',
5164
        bool $clean_utf8 = false,
5165
        string $lang = null,
5166
        bool $try_to_keep_the_string_length = false
5167
    ): string {
5168 49
        if (self::$SUPPORT['mbstring'] === true) {
5169
            /** @noinspection PhpComposerExtensionStubsInspection */
5170 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5171
5172 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5173 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5174 22
                $str = \mb_strtolower($str);
5175
            } else {
5176 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5177
            }
5178
5179
            /** @noinspection PhpComposerExtensionStubsInspection */
5180 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5181
        }
5182
5183
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5184
5185
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5186
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5187
            $str = \mb_strtolower($str);
5188
        } else {
5189
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5190
        }
5191
5192
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5193
    }
5194
5195
    /**
5196
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5197
     *
5198
     * @param string $str <p>The input string.</p>
5199
     *
5200
     * @return false|string
5201
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5202
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5203
     */
5204 30
    public static function str_detect_encoding($str)
5205
    {
5206
        // init
5207 30
        $str = (string) $str;
5208
5209
        //
5210
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5211
        //
5212
5213 30
        if (self::is_binary($str, true) === true) {
5214 11
            $is_utf32 = self::is_utf32($str, false);
5215 11
            if ($is_utf32 === 1) {
5216
                return 'UTF-32LE';
5217
            }
5218 11
            if ($is_utf32 === 2) {
5219 1
                return 'UTF-32BE';
5220
            }
5221
5222 11
            $is_utf16 = self::is_utf16($str, false);
5223 11
            if ($is_utf16 === 1) {
5224 3
                return 'UTF-16LE';
5225
            }
5226 11
            if ($is_utf16 === 2) {
5227 2
                return 'UTF-16BE';
5228
            }
5229
5230
            // is binary but not "UTF-16" or "UTF-32"
5231 9
            return false;
5232
        }
5233
5234
        //
5235
        // 2.) simple check for ASCII chars
5236
        //
5237
5238 26
        if (ASCII::is_ascii($str) === true) {
5239 10
            return 'ASCII';
5240
        }
5241
5242
        //
5243
        // 3.) simple check for UTF-8 chars
5244
        //
5245
5246 26
        if (self::is_utf8_string($str) === true) {
5247 19
            return 'UTF-8';
5248
        }
5249
5250
        //
5251
        // 4.) check via "mb_detect_encoding()"
5252
        //
5253
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5254
5255
        $encoding_detecting_order = [
5256 15
            'ISO-8859-1',
5257
            'ISO-8859-2',
5258
            'ISO-8859-3',
5259
            'ISO-8859-4',
5260
            'ISO-8859-5',
5261
            'ISO-8859-6',
5262
            'ISO-8859-7',
5263
            'ISO-8859-8',
5264
            'ISO-8859-9',
5265
            'ISO-8859-10',
5266
            'ISO-8859-13',
5267
            'ISO-8859-14',
5268
            'ISO-8859-15',
5269
            'ISO-8859-16',
5270
            'WINDOWS-1251',
5271
            'WINDOWS-1252',
5272
            'WINDOWS-1254',
5273
            'CP932',
5274
            'CP936',
5275
            'CP950',
5276
            'CP866',
5277
            'CP850',
5278
            'CP51932',
5279
            'CP50220',
5280
            'CP50221',
5281
            'CP50222',
5282
            'ISO-2022-JP',
5283
            'ISO-2022-KR',
5284
            'JIS',
5285
            'JIS-ms',
5286
            'EUC-CN',
5287
            'EUC-JP',
5288
        ];
5289
5290 15
        if (self::$SUPPORT['mbstring'] === true) {
5291
            // info: do not use the symfony polyfill here
5292 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5293 15
            if ($encoding) {
5294 15
                return $encoding;
5295
            }
5296
        }
5297
5298
        //
5299
        // 5.) check via "iconv()"
5300
        //
5301
5302
        if (self::$ENCODINGS === null) {
5303
            self::$ENCODINGS = self::getData('encodings');
5304
        }
5305
5306
        foreach (self::$ENCODINGS as $encoding_tmp) {
5307
            // INFO: //IGNORE but still throw notice
5308
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5309
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5310
                return $encoding_tmp;
5311
            }
5312
        }
5313
5314
        return false;
5315
    }
5316
5317
    /**
5318
     * alias for "UTF8::str_ends_with()"
5319
     *
5320
     * @param string $haystack
5321
     * @param string $needle
5322
     *
5323
     * @return bool
5324
     *
5325
     * @see UTF8::str_ends_with()
5326
     */
5327
    public static function str_ends(string $haystack, string $needle): bool
5328
    {
5329
        return self::str_ends_with($haystack, $needle);
5330
    }
5331
5332
    /**
5333
     * Check if the string ends with the given substring.
5334
     *
5335
     * @param string $haystack <p>The string to search in.</p>
5336
     * @param string $needle   <p>The substring to search for.</p>
5337
     *
5338
     * @return bool
5339
     */
5340 9
    public static function str_ends_with(string $haystack, string $needle): bool
5341
    {
5342 9
        if ($needle === '') {
5343 2
            return true;
5344
        }
5345
5346 9
        if ($haystack === '') {
5347
            return false;
5348
        }
5349
5350 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5351
    }
5352
5353
    /**
5354
     * Returns true if the string ends with any of $substrings, false otherwise.
5355
     *
5356
     * - case-sensitive
5357
     *
5358
     * @param string   $str        <p>The input string.</p>
5359
     * @param string[] $substrings <p>Substrings to look for.</p>
5360
     *
5361
     * @return bool whether or not $str ends with $substring
5362
     */
5363 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5364
    {
5365 7
        if ($substrings === []) {
5366
            return false;
5367
        }
5368
5369 7
        foreach ($substrings as &$substring) {
5370 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5371 7
                return true;
5372
            }
5373
        }
5374
5375 6
        return false;
5376
    }
5377
5378
    /**
5379
     * Ensures that the string begins with $substring. If it doesn't, it's
5380
     * prepended.
5381
     *
5382
     * @param string $str       <p>The input string.</p>
5383
     * @param string $substring <p>The substring to add if not present.</p>
5384
     *
5385
     * @return string
5386
     */
5387 10
    public static function str_ensure_left(string $str, string $substring): string
5388
    {
5389
        if (
5390 10
            $substring !== ''
5391
            &&
5392 10
            \strpos($str, $substring) === 0
5393
        ) {
5394 6
            return $str;
5395
        }
5396
5397 4
        return $substring . $str;
5398
    }
5399
5400
    /**
5401
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5402
     *
5403
     * @param string $str       <p>The input string.</p>
5404
     * @param string $substring <p>The substring to add if not present.</p>
5405
     *
5406
     * @return string
5407
     */
5408 10
    public static function str_ensure_right(string $str, string $substring): string
5409
    {
5410
        if (
5411 10
            $str === ''
5412
            ||
5413 10
            $substring === ''
5414
            ||
5415 10
            \substr($str, -\strlen($substring)) !== $substring
5416
        ) {
5417 4
            $str .= $substring;
5418
        }
5419
5420 10
        return $str;
5421
    }
5422
5423
    /**
5424
     * Capitalizes the first word of the string, replaces underscores with
5425
     * spaces, and strips '_id'.
5426
     *
5427
     * @param string $str
5428
     *
5429
     * @return string
5430
     */
5431 3
    public static function str_humanize($str): string
5432
    {
5433 3
        $str = \str_replace(
5434
            [
5435 3
                '_id',
5436
                '_',
5437
            ],
5438
            [
5439 3
                '',
5440
                ' ',
5441
            ],
5442 3
            $str
5443
        );
5444
5445 3
        return self::ucfirst(\trim($str));
5446
    }
5447
5448
    /**
5449
     * alias for "UTF8::str_istarts_with()"
5450
     *
5451
     * @param string $haystack
5452
     * @param string $needle
5453
     *
5454
     * @return bool
5455
     *
5456
     * @see UTF8::str_istarts_with()
5457
     */
5458
    public static function str_ibegins(string $haystack, string $needle): bool
5459
    {
5460
        return self::str_istarts_with($haystack, $needle);
5461
    }
5462
5463
    /**
5464
     * alias for "UTF8::str_iends_with()"
5465
     *
5466
     * @param string $haystack
5467
     * @param string $needle
5468
     *
5469
     * @return bool
5470
     *
5471
     * @see UTF8::str_iends_with()
5472
     */
5473
    public static function str_iends(string $haystack, string $needle): bool
5474
    {
5475
        return self::str_iends_with($haystack, $needle);
5476
    }
5477
5478
    /**
5479
     * Check if the string ends with the given substring, case insensitive.
5480
     *
5481
     * @param string $haystack <p>The string to search in.</p>
5482
     * @param string $needle   <p>The substring to search for.</p>
5483
     *
5484
     * @return bool
5485
     */
5486 12
    public static function str_iends_with(string $haystack, string $needle): bool
5487
    {
5488 12
        if ($needle === '') {
5489 2
            return true;
5490
        }
5491
5492 12
        if ($haystack === '') {
5493
            return false;
5494
        }
5495
5496 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5497
    }
5498
5499
    /**
5500
     * Returns true if the string ends with any of $substrings, false otherwise.
5501
     *
5502
     * - case-insensitive
5503
     *
5504
     * @param string   $str        <p>The input string.</p>
5505
     * @param string[] $substrings <p>Substrings to look for.</p>
5506
     *
5507
     * @return bool whether or not $str ends with $substring
5508
     */
5509 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5510
    {
5511 4
        if ($substrings === []) {
5512
            return false;
5513
        }
5514
5515 4
        foreach ($substrings as &$substring) {
5516 4
            if (self::str_iends_with($str, $substring)) {
5517 4
                return true;
5518
            }
5519
        }
5520
5521
        return false;
5522
    }
5523
5524
    /**
5525
     * Returns the index of the first occurrence of $needle in the string,
5526
     * and false if not found. Accepts an optional offset from which to begin
5527
     * the search.
5528
     *
5529
     * @param string $str      <p>The input string.</p>
5530
     * @param string $needle   <p>Substring to look for.</p>
5531
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5532
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5533
     *
5534
     * @return false|int
5535
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5536
     */
5537 2
    public static function str_iindex_first(
5538
        string $str,
5539
        string $needle,
5540
        int $offset = 0,
5541
        string $encoding = 'UTF-8'
5542
    ) {
5543 2
        return self::stripos(
5544 2
            $str,
5545 2
            $needle,
5546 2
            $offset,
5547 2
            $encoding
5548
        );
5549
    }
5550
5551
    /**
5552
     * Returns the index of the last occurrence of $needle in the string,
5553
     * and false if not found. Accepts an optional offset from which to begin
5554
     * the search. Offsets may be negative to count from the last character
5555
     * in the string.
5556
     *
5557
     * @param string $str      <p>The input string.</p>
5558
     * @param string $needle   <p>Substring to look for.</p>
5559
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5560
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5561
     *
5562
     * @return false|int
5563
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5564
     */
5565
    public static function str_iindex_last(
5566
        string $str,
5567
        string $needle,
5568
        int $offset = 0,
5569
        string $encoding = 'UTF-8'
5570
    ) {
5571
        return self::strripos(
5572
            $str,
5573
            $needle,
5574
            $offset,
5575
            $encoding
5576
        );
5577
    }
5578
5579
    /**
5580
     * Returns the index of the first occurrence of $needle in the string,
5581
     * and false if not found. Accepts an optional offset from which to begin
5582
     * the search.
5583
     *
5584
     * @param string $str      <p>The input string.</p>
5585
     * @param string $needle   <p>Substring to look for.</p>
5586
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5587
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5588
     *
5589
     * @return false|int
5590
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5591
     */
5592 10
    public static function str_index_first(
5593
        string $str,
5594
        string $needle,
5595
        int $offset = 0,
5596
        string $encoding = 'UTF-8'
5597
    ) {
5598 10
        return self::strpos(
5599 10
            $str,
5600 10
            $needle,
5601 10
            $offset,
5602 10
            $encoding
5603
        );
5604
    }
5605
5606
    /**
5607
     * Returns the index of the last occurrence of $needle in the string,
5608
     * and false if not found. Accepts an optional offset from which to begin
5609
     * the search. Offsets may be negative to count from the last character
5610
     * in the string.
5611
     *
5612
     * @param string $str      <p>The input string.</p>
5613
     * @param string $needle   <p>Substring to look for.</p>
5614
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5615
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5616
     *
5617
     * @return false|int
5618
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5619
     */
5620 10
    public static function str_index_last(
5621
        string $str,
5622
        string $needle,
5623
        int $offset = 0,
5624
        string $encoding = 'UTF-8'
5625
    ) {
5626 10
        return self::strrpos(
5627 10
            $str,
5628 10
            $needle,
5629 10
            $offset,
5630 10
            $encoding
5631
        );
5632
    }
5633
5634
    /**
5635
     * Inserts $substring into the string at the $index provided.
5636
     *
5637
     * @param string $str       <p>The input string.</p>
5638
     * @param string $substring <p>String to be inserted.</p>
5639
     * @param int    $index     <p>The index at which to insert the substring.</p>
5640
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5641
     *
5642
     * @return string
5643
     */
5644 8
    public static function str_insert(
5645
        string $str,
5646
        string $substring,
5647
        int $index,
5648
        string $encoding = 'UTF-8'
5649
    ): string {
5650 8
        if ($encoding === 'UTF-8') {
5651 4
            $len = (int) \mb_strlen($str);
5652 4
            if ($index > $len) {
5653
                return $str;
5654
            }
5655
5656
            /** @noinspection UnnecessaryCastingInspection */
5657 4
            return (string) \mb_substr($str, 0, $index) .
5658 4
                   $substring .
5659 4
                   (string) \mb_substr($str, $index, $len);
5660
        }
5661
5662 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5663
5664 4
        $len = (int) self::strlen($str, $encoding);
5665 4
        if ($index > $len) {
5666 1
            return $str;
5667
        }
5668
5669 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5670 3
               $substring .
5671 3
               ((string) self::substr($str, $index, $len, $encoding));
5672
    }
5673
5674
    /**
5675
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5676
     *
5677
     * @see http://php.net/manual/en/function.str-ireplace.php
5678
     *
5679
     * @param mixed $search  <p>
5680
     *                       Every replacement with search array is
5681
     *                       performed on the result of previous replacement.
5682
     *                       </p>
5683
     * @param mixed $replace <p>
5684
     *                       </p>
5685
     * @param mixed $subject <p>
5686
     *                       If subject is an array, then the search and
5687
     *                       replace is performed with every entry of
5688
     *                       subject, and the return value is an array as
5689
     *                       well.
5690
     *                       </p>
5691
     * @param int   $count   [optional] <p>
5692
     *                       The number of matched and replaced needles will
5693
     *                       be returned in count which is passed by
5694
     *                       reference.
5695
     *                       </p>
5696
     *
5697
     * @return mixed a string or an array of replacements
5698
     */
5699 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5700
    {
5701 29
        $search = (array) $search;
5702
5703
        /** @noinspection AlterInForeachInspection */
5704 29
        foreach ($search as &$s) {
5705 29
            $s = (string) $s;
5706 29
            if ($s === '') {
5707 6
                $s = '/^(?<=.)$/';
5708
            } else {
5709 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5710
            }
5711
        }
5712
5713 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5714 29
        $count = $replace; // used as reference parameter
5715
5716 29
        return $subject;
5717
    }
5718
5719
    /**
5720
     * Replaces $search from the beginning of string with $replacement.
5721
     *
5722
     * @param string $str         <p>The input string.</p>
5723
     * @param string $search      <p>The string to search for.</p>
5724
     * @param string $replacement <p>The replacement.</p>
5725
     *
5726
     * @return string string after the replacements
5727
     */
5728 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5729
    {
5730 17
        if ($str === '') {
5731 4
            if ($replacement === '') {
5732 2
                return '';
5733
            }
5734
5735 2
            if ($search === '') {
5736 2
                return $replacement;
5737
            }
5738
        }
5739
5740 13
        if ($search === '') {
5741 2
            return $str . $replacement;
5742
        }
5743
5744 11
        if (\stripos($str, $search) === 0) {
5745 10
            return $replacement . \substr($str, \strlen($search));
5746
        }
5747
5748 1
        return $str;
5749
    }
5750
5751
    /**
5752
     * Replaces $search from the ending of string with $replacement.
5753
     *
5754
     * @param string $str         <p>The input string.</p>
5755
     * @param string $search      <p>The string to search for.</p>
5756
     * @param string $replacement <p>The replacement.</p>
5757
     *
5758
     * @return string string after the replacements
5759
     */
5760 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5761
    {
5762 17
        if ($str === '') {
5763 4
            if ($replacement === '') {
5764 2
                return '';
5765
            }
5766
5767 2
            if ($search === '') {
5768 2
                return $replacement;
5769
            }
5770
        }
5771
5772 13
        if ($search === '') {
5773 2
            return $str . $replacement;
5774
        }
5775
5776 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5777 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5778
        }
5779
5780 11
        return $str;
5781
    }
5782
5783
    /**
5784
     * Check if the string starts with the given substring, case insensitive.
5785
     *
5786
     * @param string $haystack <p>The string to search in.</p>
5787
     * @param string $needle   <p>The substring to search for.</p>
5788
     *
5789
     * @return bool
5790
     */
5791 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5792
    {
5793 12
        if ($needle === '') {
5794 2
            return true;
5795
        }
5796
5797 12
        if ($haystack === '') {
5798
            return false;
5799
        }
5800
5801 12
        return self::stripos($haystack, $needle) === 0;
5802
    }
5803
5804
    /**
5805
     * Returns true if the string begins with any of $substrings, false otherwise.
5806
     *
5807
     * - case-insensitive
5808
     *
5809
     * @param string $str        <p>The input string.</p>
5810
     * @param array  $substrings <p>Substrings to look for.</p>
5811
     *
5812
     * @return bool whether or not $str starts with $substring
5813
     */
5814 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5815
    {
5816 4
        if ($str === '') {
5817
            return false;
5818
        }
5819
5820 4
        if ($substrings === []) {
5821
            return false;
5822
        }
5823
5824 4
        foreach ($substrings as &$substring) {
5825 4
            if (self::str_istarts_with($str, $substring)) {
5826 4
                return true;
5827
            }
5828
        }
5829
5830
        return false;
5831
    }
5832
5833
    /**
5834
     * Gets the substring after the first occurrence of a separator.
5835
     *
5836
     * @param string $str       <p>The input string.</p>
5837
     * @param string $separator <p>The string separator.</p>
5838
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5839
     *
5840
     * @return string
5841
     */
5842 1
    public static function str_isubstr_after_first_separator(
5843
        string $str,
5844
        string $separator,
5845
        string $encoding = 'UTF-8'
5846
    ): string {
5847 1
        if ($separator === '' || $str === '') {
5848 1
            return '';
5849
        }
5850
5851 1
        $offset = self::str_iindex_first($str, $separator);
5852 1
        if ($offset === false) {
5853 1
            return '';
5854
        }
5855
5856 1
        if ($encoding === 'UTF-8') {
5857 1
            return (string) \mb_substr(
5858 1
                $str,
5859 1
                $offset + (int) \mb_strlen($separator)
5860
            );
5861
        }
5862
5863
        return (string) self::substr(
5864
            $str,
5865
            $offset + (int) self::strlen($separator, $encoding),
5866
            null,
5867
            $encoding
5868
        );
5869
    }
5870
5871
    /**
5872
     * Gets the substring after the last occurrence of a separator.
5873
     *
5874
     * @param string $str       <p>The input string.</p>
5875
     * @param string $separator <p>The string separator.</p>
5876
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5877
     *
5878
     * @return string
5879
     */
5880 1
    public static function str_isubstr_after_last_separator(
5881
        string $str,
5882
        string $separator,
5883
        string $encoding = 'UTF-8'
5884
    ): string {
5885 1
        if ($separator === '' || $str === '') {
5886 1
            return '';
5887
        }
5888
5889 1
        $offset = self::strripos($str, $separator);
5890 1
        if ($offset === false) {
5891 1
            return '';
5892
        }
5893
5894 1
        if ($encoding === 'UTF-8') {
5895 1
            return (string) \mb_substr(
5896 1
                $str,
5897 1
                $offset + (int) self::strlen($separator)
5898
            );
5899
        }
5900
5901
        return (string) self::substr(
5902
            $str,
5903
            $offset + (int) self::strlen($separator, $encoding),
5904
            null,
5905
            $encoding
5906
        );
5907
    }
5908
5909
    /**
5910
     * Gets the substring before the first occurrence of a separator.
5911
     *
5912
     * @param string $str       <p>The input string.</p>
5913
     * @param string $separator <p>The string separator.</p>
5914
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5915
     *
5916
     * @return string
5917
     */
5918 1
    public static function str_isubstr_before_first_separator(
5919
        string $str,
5920
        string $separator,
5921
        string $encoding = 'UTF-8'
5922
    ): string {
5923 1
        if ($separator === '' || $str === '') {
5924 1
            return '';
5925
        }
5926
5927 1
        $offset = self::str_iindex_first($str, $separator);
5928 1
        if ($offset === false) {
5929 1
            return '';
5930
        }
5931
5932 1
        if ($encoding === 'UTF-8') {
5933 1
            return (string) \mb_substr($str, 0, $offset);
5934
        }
5935
5936
        return (string) self::substr($str, 0, $offset, $encoding);
5937
    }
5938
5939
    /**
5940
     * Gets the substring before the last occurrence of a separator.
5941
     *
5942
     * @param string $str       <p>The input string.</p>
5943
     * @param string $separator <p>The string separator.</p>
5944
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5945
     *
5946
     * @return string
5947
     */
5948 1
    public static function str_isubstr_before_last_separator(
5949
        string $str,
5950
        string $separator,
5951
        string $encoding = 'UTF-8'
5952
    ): string {
5953 1
        if ($separator === '' || $str === '') {
5954 1
            return '';
5955
        }
5956
5957 1
        if ($encoding === 'UTF-8') {
5958 1
            $offset = \mb_strripos($str, $separator);
5959 1
            if ($offset === false) {
5960 1
                return '';
5961
            }
5962
5963 1
            return (string) \mb_substr($str, 0, $offset);
5964
        }
5965
5966
        $offset = self::strripos($str, $separator, 0, $encoding);
5967
        if ($offset === false) {
5968
            return '';
5969
        }
5970
5971
        return (string) self::substr($str, 0, $offset, $encoding);
5972
    }
5973
5974
    /**
5975
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
5976
     *
5977
     * @param string $str           <p>The input string.</p>
5978
     * @param string $needle        <p>The string to look for.</p>
5979
     * @param bool   $before_needle [optional] <p>Default: false</p>
5980
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
5981
     *
5982
     * @return string
5983
     */
5984 2
    public static function str_isubstr_first(
5985
        string $str,
5986
        string $needle,
5987
        bool $before_needle = false,
5988
        string $encoding = 'UTF-8'
5989
    ): string {
5990
        if (
5991 2
            $needle === ''
5992
            ||
5993 2
            $str === ''
5994
        ) {
5995 2
            return '';
5996
        }
5997
5998 2
        $part = self::stristr(
5999 2
            $str,
6000 2
            $needle,
6001 2
            $before_needle,
6002 2
            $encoding
6003
        );
6004 2
        if ($part === false) {
6005 2
            return '';
6006
        }
6007
6008 2
        return $part;
6009
    }
6010
6011
    /**
6012
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6013
     *
6014
     * @param string $str           <p>The input string.</p>
6015
     * @param string $needle        <p>The string to look for.</p>
6016
     * @param bool   $before_needle [optional] <p>Default: false</p>
6017
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6018
     *
6019
     * @return string
6020
     */
6021 1
    public static function str_isubstr_last(
6022
        string $str,
6023
        string $needle,
6024
        bool $before_needle = false,
6025
        string $encoding = 'UTF-8'
6026
    ): string {
6027
        if (
6028 1
            $needle === ''
6029
            ||
6030 1
            $str === ''
6031
        ) {
6032 1
            return '';
6033
        }
6034
6035 1
        $part = self::strrichr(
6036 1
            $str,
6037 1
            $needle,
6038 1
            $before_needle,
6039 1
            $encoding
6040
        );
6041 1
        if ($part === false) {
6042 1
            return '';
6043
        }
6044
6045 1
        return $part;
6046
    }
6047
6048
    /**
6049
     * Returns the last $n characters of the string.
6050
     *
6051
     * @param string $str      <p>The input string.</p>
6052
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6053
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6054
     *
6055
     * @return string
6056
     */
6057 12
    public static function str_last_char(
6058
        string $str,
6059
        int $n = 1,
6060
        string $encoding = 'UTF-8'
6061
    ): string {
6062 12
        if ($str === '' || $n <= 0) {
6063 4
            return '';
6064
        }
6065
6066 8
        if ($encoding === 'UTF-8') {
6067 4
            return (string) \mb_substr($str, -$n);
6068
        }
6069
6070 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6071
6072 4
        return (string) self::substr($str, -$n, null, $encoding);
6073
    }
6074
6075
    /**
6076
     * Limit the number of characters in a string.
6077
     *
6078
     * @param string $str        <p>The input string.</p>
6079
     * @param int    $length     [optional] <p>Default: 100</p>
6080
     * @param string $str_add_on [optional] <p>Default: …</p>
6081
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6082
     *
6083
     * @return string
6084
     */
6085 2
    public static function str_limit(
6086
        string $str,
6087
        int $length = 100,
6088
        string $str_add_on = '…',
6089
        string $encoding = 'UTF-8'
6090
    ): string {
6091 2
        if ($str === '' || $length <= 0) {
6092 2
            return '';
6093
        }
6094
6095 2
        if ($encoding === 'UTF-8') {
6096 2
            if ((int) \mb_strlen($str) <= $length) {
6097 2
                return $str;
6098
            }
6099
6100
            /** @noinspection UnnecessaryCastingInspection */
6101 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6102
        }
6103
6104
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6105
6106
        if ((int) self::strlen($str, $encoding) <= $length) {
6107
            return $str;
6108
        }
6109
6110
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6111
    }
6112
6113
    /**
6114
     * Limit the number of characters in a string, but also after the next word.
6115
     *
6116
     * @param string $str        <p>The input string.</p>
6117
     * @param int    $length     [optional] <p>Default: 100</p>
6118
     * @param string $str_add_on [optional] <p>Default: …</p>
6119
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6120
     *
6121
     * @return string
6122
     */
6123 6
    public static function str_limit_after_word(
6124
        string $str,
6125
        int $length = 100,
6126
        string $str_add_on = '…',
6127
        string $encoding = 'UTF-8'
6128
    ): string {
6129 6
        if ($str === '' || $length <= 0) {
6130 2
            return '';
6131
        }
6132
6133 6
        if ($encoding === 'UTF-8') {
6134
            /** @noinspection UnnecessaryCastingInspection */
6135 2
            if ((int) \mb_strlen($str) <= $length) {
6136 2
                return $str;
6137
            }
6138
6139 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6140 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6141
            }
6142
6143 2
            $str = \mb_substr($str, 0, $length);
6144
6145 2
            $array = \explode(' ', $str);
6146 2
            \array_pop($array);
6147 2
            $new_str = \implode(' ', $array);
6148
6149 2
            if ($new_str === '') {
6150 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6151
            }
6152
        } else {
6153 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6154
                return $str;
6155
            }
6156
6157 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6158 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6159
            }
6160
6161
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6162 1
            $str = self::substr($str, 0, $length, $encoding);
6163
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6164 1
            if ($str === false) {
6165
                return '' . $str_add_on;
6166
            }
6167
6168 1
            $array = \explode(' ', $str);
6169 1
            \array_pop($array);
6170 1
            $new_str = \implode(' ', $array);
6171
6172 1
            if ($new_str === '') {
6173
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6174
            }
6175
        }
6176
6177 3
        return $new_str . $str_add_on;
6178
    }
6179
6180
    /**
6181
     * Returns the longest common prefix between the $str1 and $str2.
6182
     *
6183
     * @param string $str1     <p>The input sting.</p>
6184
     * @param string $str2     <p>Second string for comparison.</p>
6185
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6186
     *
6187
     * @return string
6188
     */
6189 10
    public static function str_longest_common_prefix(
6190
        string $str1,
6191
        string $str2,
6192
        string $encoding = 'UTF-8'
6193
    ): string {
6194
        // init
6195 10
        $longest_common_prefix = '';
6196
6197 10
        if ($encoding === 'UTF-8') {
6198 5
            $max_length = (int) \min(
6199 5
                \mb_strlen($str1),
6200 5
                \mb_strlen($str2)
6201
            );
6202
6203 5
            for ($i = 0; $i < $max_length; ++$i) {
6204 4
                $char = \mb_substr($str1, $i, 1);
6205
6206
                if (
6207 4
                    $char !== false
6208
                    &&
6209 4
                    $char === \mb_substr($str2, $i, 1)
6210
                ) {
6211 3
                    $longest_common_prefix .= $char;
6212
                } else {
6213 3
                    break;
6214
                }
6215
            }
6216
        } else {
6217 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6218
6219 5
            $max_length = (int) \min(
6220 5
                self::strlen($str1, $encoding),
6221 5
                self::strlen($str2, $encoding)
6222
            );
6223
6224 5
            for ($i = 0; $i < $max_length; ++$i) {
6225 4
                $char = self::substr($str1, $i, 1, $encoding);
6226
6227
                if (
6228 4
                    $char !== false
6229
                    &&
6230 4
                    $char === self::substr($str2, $i, 1, $encoding)
6231
                ) {
6232 3
                    $longest_common_prefix .= $char;
6233
                } else {
6234 3
                    break;
6235
                }
6236
            }
6237
        }
6238
6239 10
        return $longest_common_prefix;
6240
    }
6241
6242
    /**
6243
     * Returns the longest common substring between the $str1 and $str2.
6244
     * In the case of ties, it returns that which occurs first.
6245
     *
6246
     * @param string $str1
6247
     * @param string $str2     <p>Second string for comparison.</p>
6248
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6249
     *
6250
     * @return string string with its $str being the longest common substring
6251
     */
6252 11
    public static function str_longest_common_substring(
6253
        string $str1,
6254
        string $str2,
6255
        string $encoding = 'UTF-8'
6256
    ): string {
6257 11
        if ($str1 === '' || $str2 === '') {
6258 2
            return '';
6259
        }
6260
6261
        // Uses dynamic programming to solve
6262
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6263
6264 9
        if ($encoding === 'UTF-8') {
6265 4
            $str_length = (int) \mb_strlen($str1);
6266 4
            $other_length = (int) \mb_strlen($str2);
6267
        } else {
6268 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6269
6270 5
            $str_length = (int) self::strlen($str1, $encoding);
6271 5
            $other_length = (int) self::strlen($str2, $encoding);
6272
        }
6273
6274
        // Return if either string is empty
6275 9
        if ($str_length === 0 || $other_length === 0) {
6276
            return '';
6277
        }
6278
6279 9
        $len = 0;
6280 9
        $end = 0;
6281 9
        $table = \array_fill(
6282 9
            0,
6283 9
            $str_length + 1,
6284 9
            \array_fill(0, $other_length + 1, 0)
6285
        );
6286
6287 9
        if ($encoding === 'UTF-8') {
6288 9
            for ($i = 1; $i <= $str_length; ++$i) {
6289 9
                for ($j = 1; $j <= $other_length; ++$j) {
6290 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6291 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6292
6293 9
                    if ($str_char === $other_char) {
6294 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6295 8
                        if ($table[$i][$j] > $len) {
6296 8
                            $len = $table[$i][$j];
6297 8
                            $end = $i;
6298
                        }
6299
                    } else {
6300 9
                        $table[$i][$j] = 0;
6301
                    }
6302
                }
6303
            }
6304
        } else {
6305
            for ($i = 1; $i <= $str_length; ++$i) {
6306
                for ($j = 1; $j <= $other_length; ++$j) {
6307
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6308
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6309
6310
                    if ($str_char === $other_char) {
6311
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6312
                        if ($table[$i][$j] > $len) {
6313
                            $len = $table[$i][$j];
6314
                            $end = $i;
6315
                        }
6316
                    } else {
6317
                        $table[$i][$j] = 0;
6318
                    }
6319
                }
6320
            }
6321
        }
6322
6323 9
        if ($encoding === 'UTF-8') {
6324 9
            return (string) \mb_substr($str1, $end - $len, $len);
6325
        }
6326
6327
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6328
    }
6329
6330
    /**
6331
     * Returns the longest common suffix between the $str1 and $str2.
6332
     *
6333
     * @param string $str1
6334
     * @param string $str2     <p>Second string for comparison.</p>
6335
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6336
     *
6337
     * @return string
6338
     */
6339 10
    public static function str_longest_common_suffix(
6340
        string $str1,
6341
        string $str2,
6342
        string $encoding = 'UTF-8'
6343
    ): string {
6344 10
        if ($str1 === '' || $str2 === '') {
6345 2
            return '';
6346
        }
6347
6348 8
        if ($encoding === 'UTF-8') {
6349 4
            $max_length = (int) \min(
6350 4
                \mb_strlen($str1, $encoding),
6351 4
                \mb_strlen($str2, $encoding)
6352
            );
6353
6354 4
            $longest_common_suffix = '';
6355 4
            for ($i = 1; $i <= $max_length; ++$i) {
6356 4
                $char = \mb_substr($str1, -$i, 1);
6357
6358
                if (
6359 4
                    $char !== false
6360
                    &&
6361 4
                    $char === \mb_substr($str2, -$i, 1)
6362
                ) {
6363 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6364
                } else {
6365 3
                    break;
6366
                }
6367
            }
6368
        } else {
6369 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6370
6371 4
            $max_length = (int) \min(
6372 4
                self::strlen($str1, $encoding),
6373 4
                self::strlen($str2, $encoding)
6374
            );
6375
6376 4
            $longest_common_suffix = '';
6377 4
            for ($i = 1; $i <= $max_length; ++$i) {
6378 4
                $char = self::substr($str1, -$i, 1, $encoding);
6379
6380
                if (
6381 4
                    $char !== false
6382
                    &&
6383 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6384
                ) {
6385 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6386
                } else {
6387 3
                    break;
6388
                }
6389
            }
6390
        }
6391
6392 8
        return $longest_common_suffix;
6393
    }
6394
6395
    /**
6396
     * Returns true if $str matches the supplied pattern, false otherwise.
6397
     *
6398
     * @param string $str     <p>The input string.</p>
6399
     * @param string $pattern <p>Regex pattern to match against.</p>
6400
     *
6401
     * @return bool whether or not $str matches the pattern
6402
     */
6403
    public static function str_matches_pattern(string $str, string $pattern): bool
6404
    {
6405
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6406
    }
6407
6408
    /**
6409
     * Returns whether or not a character exists at an index. Offsets may be
6410
     * negative to count from the last character in the string. Implements
6411
     * part of the ArrayAccess interface.
6412
     *
6413
     * @param string $str      <p>The input string.</p>
6414
     * @param int    $offset   <p>The index to check.</p>
6415
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6416
     *
6417
     * @return bool whether or not the index exists
6418
     */
6419 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6420
    {
6421
        // init
6422 6
        $length = (int) self::strlen($str, $encoding);
6423
6424 6
        if ($offset >= 0) {
6425 3
            return $length > $offset;
6426
        }
6427
6428 3
        return $length >= \abs($offset);
6429
    }
6430
6431
    /**
6432
     * Returns the character at the given index. Offsets may be negative to
6433
     * count from the last character in the string. Implements part of the
6434
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6435
     * does not exist.
6436
     *
6437
     * @param string $str      <p>The input string.</p>
6438
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6439
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6440
     *
6441
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6442
     *
6443
     * @return string the character at the specified index
6444
     */
6445 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6446
    {
6447
        // init
6448 2
        $length = (int) self::strlen($str);
6449
6450
        if (
6451 2
            ($index >= 0 && $length <= $index)
6452
            ||
6453 2
            $length < \abs($index)
6454
        ) {
6455 1
            throw new \OutOfBoundsException('No character exists at the index');
6456
        }
6457
6458 1
        return self::char_at($str, $index, $encoding);
6459
    }
6460
6461
    /**
6462
     * Pad a UTF-8 string to given length with another string.
6463
     *
6464
     * @param string     $str        <p>The input string.</p>
6465
     * @param int        $pad_length <p>The length of return string.</p>
6466
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6467
     * @param int|string $pad_type   [optional] <p>
6468
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6469
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6470
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6471
     *                               </p>
6472
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6473
     *
6474
     * @return string returns the padded string
6475
     */
6476 41
    public static function str_pad(
6477
        string $str,
6478
        int $pad_length,
6479
        string $pad_string = ' ',
6480
        $pad_type = \STR_PAD_RIGHT,
6481
        string $encoding = 'UTF-8'
6482
    ): string {
6483 41
        if ($pad_length === 0 || $pad_string === '') {
6484 1
            return $str;
6485
        }
6486
6487 41
        if ($pad_type !== (int) $pad_type) {
6488 13
            if ($pad_type === 'left') {
6489 3
                $pad_type = \STR_PAD_LEFT;
6490 10
            } elseif ($pad_type === 'right') {
6491 6
                $pad_type = \STR_PAD_RIGHT;
6492 4
            } elseif ($pad_type === 'both') {
6493 3
                $pad_type = \STR_PAD_BOTH;
6494
            } else {
6495 1
                throw new \InvalidArgumentException(
6496 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6497
                );
6498
            }
6499
        }
6500
6501 40
        if ($encoding === 'UTF-8') {
6502 25
            $str_length = (int) \mb_strlen($str);
6503
6504 25
            if ($pad_length >= $str_length) {
6505
                switch ($pad_type) {
6506 25
                    case \STR_PAD_LEFT:
6507 8
                        $ps_length = (int) \mb_strlen($pad_string);
6508
6509 8
                        $diff = ($pad_length - $str_length);
6510
6511 8
                        $pre = (string) \mb_substr(
6512 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6513 8
                            0,
6514 8
                            $diff
6515
                        );
6516 8
                        $post = '';
6517
6518 8
                        break;
6519
6520 20
                    case \STR_PAD_BOTH:
6521 14
                        $diff = ($pad_length - $str_length);
6522
6523 14
                        $ps_length_left = (int) \floor($diff / 2);
6524
6525 14
                        $ps_length_right = (int) \ceil($diff / 2);
6526
6527 14
                        $pre = (string) \mb_substr(
6528 14
                            \str_repeat($pad_string, $ps_length_left),
6529 14
                            0,
6530 14
                            $ps_length_left
6531
                        );
6532 14
                        $post = (string) \mb_substr(
6533 14
                            \str_repeat($pad_string, $ps_length_right),
6534 14
                            0,
6535 14
                            $ps_length_right
6536
                        );
6537
6538 14
                        break;
6539
6540 9
                    case \STR_PAD_RIGHT:
6541
                    default:
6542 9
                        $ps_length = (int) \mb_strlen($pad_string);
6543
6544 9
                        $diff = ($pad_length - $str_length);
6545
6546 9
                        $post = (string) \mb_substr(
6547 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6548 9
                            0,
6549 9
                            $diff
6550
                        );
6551 9
                        $pre = '';
6552
                }
6553
6554 25
                return $pre . $str . $post;
6555
            }
6556
6557 3
            return $str;
6558
        }
6559
6560 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6561
6562 15
        $str_length = (int) self::strlen($str, $encoding);
6563
6564 15
        if ($pad_length >= $str_length) {
6565
            switch ($pad_type) {
6566 14
                case \STR_PAD_LEFT:
6567 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6568
6569 5
                    $diff = ($pad_length - $str_length);
6570
6571 5
                    $pre = (string) self::substr(
6572 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6573 5
                        0,
6574 5
                        $diff,
6575 5
                        $encoding
6576
                    );
6577 5
                    $post = '';
6578
6579 5
                    break;
6580
6581 9
                case \STR_PAD_BOTH:
6582 3
                    $diff = ($pad_length - $str_length);
6583
6584 3
                    $ps_length_left = (int) \floor($diff / 2);
6585
6586 3
                    $ps_length_right = (int) \ceil($diff / 2);
6587
6588 3
                    $pre = (string) self::substr(
6589 3
                        \str_repeat($pad_string, $ps_length_left),
6590 3
                        0,
6591 3
                        $ps_length_left,
6592 3
                        $encoding
6593
                    );
6594 3
                    $post = (string) self::substr(
6595 3
                        \str_repeat($pad_string, $ps_length_right),
6596 3
                        0,
6597 3
                        $ps_length_right,
6598 3
                        $encoding
6599
                    );
6600
6601 3
                    break;
6602
6603 6
                case \STR_PAD_RIGHT:
6604
                default:
6605 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6606
6607 6
                    $diff = ($pad_length - $str_length);
6608
6609 6
                    $post = (string) self::substr(
6610 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6611 6
                        0,
6612 6
                        $diff,
6613 6
                        $encoding
6614
                    );
6615 6
                    $pre = '';
6616
            }
6617
6618 14
            return $pre . $str . $post;
6619
        }
6620
6621 1
        return $str;
6622
    }
6623
6624
    /**
6625
     * Returns a new string of a given length such that both sides of the
6626
     * string are padded. Alias for pad() with a $pad_type of 'both'.
6627
     *
6628
     * @param string $str
6629
     * @param int    $length   <p>Desired string length after padding.</p>
6630
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6631
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6632
     *
6633
     * @return string string with padding applied
6634
     */
6635 11
    public static function str_pad_both(
6636
        string $str,
6637
        int $length,
6638
        string $pad_str = ' ',
6639
        string $encoding = 'UTF-8'
6640
    ): string {
6641 11
        return self::str_pad(
6642 11
            $str,
6643 11
            $length,
6644 11
            $pad_str,
6645 11
            \STR_PAD_BOTH,
6646 11
            $encoding
6647
        );
6648
    }
6649
6650
    /**
6651
     * Returns a new string of a given length such that the beginning of the
6652
     * string is padded. Alias for pad() with a $pad_type of 'left'.
6653
     *
6654
     * @param string $str
6655
     * @param int    $length   <p>Desired string length after padding.</p>
6656
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6657
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6658
     *
6659
     * @return string string with left padding
6660
     */
6661 7
    public static function str_pad_left(
6662
        string $str,
6663
        int $length,
6664
        string $pad_str = ' ',
6665
        string $encoding = 'UTF-8'
6666
    ): string {
6667 7
        return self::str_pad(
6668 7
            $str,
6669 7
            $length,
6670 7
            $pad_str,
6671 7
            \STR_PAD_LEFT,
6672 7
            $encoding
6673
        );
6674
    }
6675
6676
    /**
6677
     * Returns a new string of a given length such that the end of the string
6678
     * is padded. Alias for pad() with a $pad_type of 'right'.
6679
     *
6680
     * @param string $str
6681
     * @param int    $length   <p>Desired string length after padding.</p>
6682
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6683
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6684
     *
6685
     * @return string string with right padding
6686
     */
6687 7
    public static function str_pad_right(
6688
        string $str,
6689
        int $length,
6690
        string $pad_str = ' ',
6691
        string $encoding = 'UTF-8'
6692
    ): string {
6693 7
        return self::str_pad(
6694 7
            $str,
6695 7
            $length,
6696 7
            $pad_str,
6697 7
            \STR_PAD_RIGHT,
6698 7
            $encoding
6699
        );
6700
    }
6701
6702
    /**
6703
     * Repeat a string.
6704
     *
6705
     * @param string $str        <p>
6706
     *                           The string to be repeated.
6707
     *                           </p>
6708
     * @param int    $multiplier <p>
6709
     *                           Number of time the input string should be
6710
     *                           repeated.
6711
     *                           </p>
6712
     *                           <p>
6713
     *                           multiplier has to be greater than or equal to 0.
6714
     *                           If the multiplier is set to 0, the function
6715
     *                           will return an empty string.
6716
     *                           </p>
6717
     *
6718
     * @return string the repeated string
6719
     */
6720 9
    public static function str_repeat(string $str, int $multiplier): string
6721
    {
6722 9
        $str = self::filter($str);
6723
6724 9
        return \str_repeat($str, $multiplier);
6725
    }
6726
6727
    /**
6728
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6729
     *
6730
     * Replace all occurrences of the search string with the replacement string
6731
     *
6732
     * @see http://php.net/manual/en/function.str-replace.php
6733
     *
6734
     * @param mixed $search  <p>
6735
     *                       The value being searched for, otherwise known as the needle.
6736
     *                       An array may be used to designate multiple needles.
6737
     *                       </p>
6738
     * @param mixed $replace <p>
6739
     *                       The replacement value that replaces found search
6740
     *                       values. An array may be used to designate multiple replacements.
6741
     *                       </p>
6742
     * @param mixed $subject <p>
6743
     *                       The string or array being searched and replaced on,
6744
     *                       otherwise known as the haystack.
6745
     *                       </p>
6746
     *                       <p>
6747
     *                       If subject is an array, then the search and
6748
     *                       replace is performed with every entry of
6749
     *                       subject, and the return value is an array as
6750
     *                       well.
6751
     *                       </p>
6752
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6753
     *
6754
     * @return mixed this function returns a string or an array with the replaced values
6755
     */
6756 12
    public static function str_replace(
6757
        $search,
6758
        $replace,
6759
        $subject,
6760
        int &$count = null
6761
    ) {
6762
        /**
6763
         * @psalm-suppress PossiblyNullArgument
6764
         */
6765 12
        return \str_replace(
6766 12
            $search,
6767 12
            $replace,
6768 12
            $subject,
6769 12
            $count
6770
        );
6771
    }
6772
6773
    /**
6774
     * Replaces $search from the beginning of string with $replacement.
6775
     *
6776
     * @param string $str         <p>The input string.</p>
6777
     * @param string $search      <p>The string to search for.</p>
6778
     * @param string $replacement <p>The replacement.</p>
6779
     *
6780
     * @return string string after the replacements
6781
     */
6782 17
    public static function str_replace_beginning(
6783
        string $str,
6784
        string $search,
6785
        string $replacement
6786
    ): string {
6787 17
        if ($str === '') {
6788 4
            if ($replacement === '') {
6789 2
                return '';
6790
            }
6791
6792 2
            if ($search === '') {
6793 2
                return $replacement;
6794
            }
6795
        }
6796
6797 13
        if ($search === '') {
6798 2
            return $str . $replacement;
6799
        }
6800
6801 11
        if (\strpos($str, $search) === 0) {
6802 9
            return $replacement . \substr($str, \strlen($search));
6803
        }
6804
6805 2
        return $str;
6806
    }
6807
6808
    /**
6809
     * Replaces $search from the ending of string with $replacement.
6810
     *
6811
     * @param string $str         <p>The input string.</p>
6812
     * @param string $search      <p>The string to search for.</p>
6813
     * @param string $replacement <p>The replacement.</p>
6814
     *
6815
     * @return string string after the replacements
6816
     */
6817 17
    public static function str_replace_ending(
6818
        string $str,
6819
        string $search,
6820
        string $replacement
6821
    ): string {
6822 17
        if ($str === '') {
6823 4
            if ($replacement === '') {
6824 2
                return '';
6825
            }
6826
6827 2
            if ($search === '') {
6828 2
                return $replacement;
6829
            }
6830
        }
6831
6832 13
        if ($search === '') {
6833 2
            return $str . $replacement;
6834
        }
6835
6836 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6837 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6838
        }
6839
6840 11
        return $str;
6841
    }
6842
6843
    /**
6844
     * Replace the first "$search"-term with the "$replace"-term.
6845
     *
6846
     * @param string $search
6847
     * @param string $replace
6848
     * @param string $subject
6849
     *
6850
     * @return string
6851
     *
6852
     * @psalm-suppress InvalidReturnType
6853
     */
6854 2
    public static function str_replace_first(
6855
        string $search,
6856
        string $replace,
6857
        string $subject
6858
    ): string {
6859 2
        $pos = self::strpos($subject, $search);
6860
6861 2
        if ($pos !== false) {
6862
            /**
6863
             * @psalm-suppress InvalidReturnStatement
6864
             */
6865 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6866 2
                $subject,
6867 2
                $replace,
6868 2
                $pos,
6869 2
                (int) self::strlen($search)
6870
            );
6871
        }
6872
6873 2
        return $subject;
6874
    }
6875
6876
    /**
6877
     * Replace the last "$search"-term with the "$replace"-term.
6878
     *
6879
     * @param string $search
6880
     * @param string $replace
6881
     * @param string $subject
6882
     *
6883
     * @return string
6884
     *
6885
     * @psalm-suppress InvalidReturnType
6886
     */
6887 2
    public static function str_replace_last(
6888
        string $search,
6889
        string $replace,
6890
        string $subject
6891
    ): string {
6892 2
        $pos = self::strrpos($subject, $search);
6893 2
        if ($pos !== false) {
6894
            /**
6895
             * @psalm-suppress InvalidReturnStatement
6896
             */
6897 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6898 2
                $subject,
6899 2
                $replace,
6900 2
                $pos,
6901 2
                (int) self::strlen($search)
6902
            );
6903
        }
6904
6905 2
        return $subject;
6906
    }
6907
6908
    /**
6909
     * Shuffles all the characters in the string.
6910
     *
6911
     * PS: uses random algorithm which is weak for cryptography purposes
6912
     *
6913
     * @param string $str      <p>The input string</p>
6914
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6915
     *
6916
     * @return string the shuffled string
6917
     */
6918 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6919
    {
6920 5
        if ($encoding === 'UTF-8') {
6921 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6922
            /** @noinspection NonSecureShuffleUsageInspection */
6923 5
            \shuffle($indexes);
6924
6925
            // init
6926 5
            $shuffled_str = '';
6927
6928 5
            foreach ($indexes as &$i) {
6929 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
6930 5
                if ($tmp_sub_str !== false) {
6931 5
                    $shuffled_str .= $tmp_sub_str;
6932
                }
6933
            }
6934
        } else {
6935
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6936
6937
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6938
            /** @noinspection NonSecureShuffleUsageInspection */
6939
            \shuffle($indexes);
6940
6941
            // init
6942
            $shuffled_str = '';
6943
6944
            foreach ($indexes as &$i) {
6945
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
6946
                if ($tmp_sub_str !== false) {
6947
                    $shuffled_str .= $tmp_sub_str;
6948
                }
6949
            }
6950
        }
6951
6952 5
        return $shuffled_str;
6953
    }
6954
6955
    /**
6956
     * Returns the substring beginning at $start, and up to, but not including
6957
     * the index specified by $end. If $end is omitted, the function extracts
6958
     * the remaining string. If $end is negative, it is computed from the end
6959
     * of the string.
6960
     *
6961
     * @param string $str
6962
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
6963
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
6964
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6965
     *
6966
     * @return false|string
6967
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
6968
     *                      characters long, <b>FALSE</b> will be returned.
6969
     */
6970 18
    public static function str_slice(
6971
        string $str,
6972
        int $start,
6973
        int $end = null,
6974
        string $encoding = 'UTF-8'
6975
    ) {
6976 18
        if ($encoding === 'UTF-8') {
6977 7
            if ($end === null) {
6978 1
                $length = (int) \mb_strlen($str);
6979 6
            } elseif ($end >= 0 && $end <= $start) {
6980 2
                return '';
6981 4
            } elseif ($end < 0) {
6982 1
                $length = (int) \mb_strlen($str) + $end - $start;
6983
            } else {
6984 3
                $length = $end - $start;
6985
            }
6986
6987 5
            return \mb_substr($str, $start, $length);
6988
        }
6989
6990 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6991
6992 11
        if ($end === null) {
6993 5
            $length = (int) self::strlen($str, $encoding);
6994 6
        } elseif ($end >= 0 && $end <= $start) {
6995 2
            return '';
6996 4
        } elseif ($end < 0) {
6997 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
6998
        } else {
6999 3
            $length = $end - $start;
7000
        }
7001
7002 9
        return self::substr($str, $start, $length, $encoding);
7003
    }
7004
7005
    /**
7006
     * Convert a string to e.g.: "snake_case"
7007
     *
7008
     * @param string $str
7009
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7010
     *
7011
     * @return string string in snake_case
7012
     */
7013 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7014
    {
7015 22
        if ($str === '') {
7016
            return '';
7017
        }
7018
7019 22
        $str = \str_replace(
7020 22
            '-',
7021 22
            '_',
7022 22
            self::normalize_whitespace($str)
7023
        );
7024
7025 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7026 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7027
        }
7028
7029 22
        $str = (string) \preg_replace_callback(
7030 22
            '/([\\p{N}|\\p{Lu}])/u',
7031
            /**
7032
             * @param string[] $matches
7033
             *
7034
             * @return string
7035
             */
7036
            static function (array $matches) use ($encoding): string {
7037 9
                $match = $matches[1];
7038 9
                $match_int = (int) $match;
7039
7040 9
                if ((string) $match_int === $match) {
7041 4
                    return '_' . $match . '_';
7042
                }
7043
7044 5
                if ($encoding === 'UTF-8') {
7045 5
                    return '_' . \mb_strtolower($match);
7046
                }
7047
7048
                return '_' . self::strtolower($match, $encoding);
7049 22
            },
7050 22
            $str
7051
        );
7052
7053 22
        $str = (string) \preg_replace(
7054
            [
7055 22
                '/\\s+/u',           // convert spaces to "_"
7056
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7057
                '/_+/',                 // remove double "_"
7058
            ],
7059
            [
7060 22
                '_',
7061
                '',
7062
                '_',
7063
            ],
7064 22
            $str
7065
        );
7066
7067 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7068
    }
7069
7070
    /**
7071
     * Sort all characters according to code points.
7072
     *
7073
     * @param string $str    <p>A UTF-8 string.</p>
7074
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7075
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7076
     *
7077
     * @return string string of sorted characters
7078
     */
7079 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7080
    {
7081 2
        $array = self::codepoints($str);
7082
7083 2
        if ($unique) {
7084 2
            $array = \array_flip(\array_flip($array));
7085
        }
7086
7087 2
        if ($desc) {
7088 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7088
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7089
        } else {
7090 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7090
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7091
        }
7092
7093 2
        return self::string($array);
7094
    }
7095
7096
    /**
7097
     * Convert a string to an array of Unicode characters.
7098
     *
7099
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7100
     * @param int                       $length                  [optional] <p>Max character length of each array
7101
     *                                                           element.</p>
7102
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7103
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7104
     *                                                           "mb_substr"</p>
7105
     *
7106
     * @return array
7107
     *               <p>An array containing chunks of the input.</p>
7108
     */
7109 89
    public static function str_split(
7110
        $str,
7111
        int $length = 1,
7112
        bool $clean_utf8 = false,
7113
        bool $try_to_use_mb_functions = true
7114
    ): array {
7115 89
        if ($length <= 0) {
7116 3
            return [];
7117
        }
7118
7119 88
        if (\is_array($str) === true) {
7120 2
            foreach ($str as $k => &$v) {
7121 2
                $v = self::str_split(
7122 2
                    $v,
7123 2
                    $length,
7124 2
                    $clean_utf8,
7125 2
                    $try_to_use_mb_functions
7126
                );
7127
            }
7128
7129 2
            return $str;
7130
        }
7131
7132
        // init
7133 88
        $str = (string) $str;
7134
7135 88
        if ($str === '') {
7136 13
            return [];
7137
        }
7138
7139 85
        if ($clean_utf8 === true) {
7140 19
            $str = self::clean($str);
7141
        }
7142
7143
        if (
7144 85
            $try_to_use_mb_functions === true
7145
            &&
7146 85
            self::$SUPPORT['mbstring'] === true
7147
        ) {
7148 81
            $i_max = \mb_strlen($str);
7149 81
            if ($i_max <= 127) {
7150 75
                $ret = [];
7151 75
                for ($i = 0; $i < $i_max; ++$i) {
7152 75
                    $ret[] = \mb_substr($str, $i, 1);
7153
                }
7154
            } else {
7155 16
                $return_array = [];
7156 16
                \preg_match_all('/./us', $str, $return_array);
7157 81
                $ret = $return_array[0] ?? [];
7158
            }
7159 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7160 17
            $return_array = [];
7161 17
            \preg_match_all('/./us', $str, $return_array);
7162 17
            $ret = $return_array[0] ?? [];
7163
        } else {
7164
7165
            // fallback
7166
7167 8
            $ret = [];
7168 8
            $len = \strlen($str);
7169
7170
            /** @noinspection ForeachInvariantsInspection */
7171 8
            for ($i = 0; $i < $len; ++$i) {
7172 8
                if (($str[$i] & "\x80") === "\x00") {
7173 8
                    $ret[] = $str[$i];
7174
                } elseif (
7175 8
                    isset($str[$i + 1])
7176
                    &&
7177 8
                    ($str[$i] & "\xE0") === "\xC0"
7178
                ) {
7179 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7180 4
                        $ret[] = $str[$i] . $str[$i + 1];
7181
7182 4
                        ++$i;
7183
                    }
7184
                } elseif (
7185 6
                    isset($str[$i + 2])
7186
                    &&
7187 6
                    ($str[$i] & "\xF0") === "\xE0"
7188
                ) {
7189
                    if (
7190 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7191
                        &&
7192 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7193
                    ) {
7194 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7195
7196 6
                        $i += 2;
7197
                    }
7198
                } elseif (
7199
                    isset($str[$i + 3])
7200
                    &&
7201
                    ($str[$i] & "\xF8") === "\xF0"
7202
                ) {
7203
                    if (
7204
                        ($str[$i + 1] & "\xC0") === "\x80"
7205
                        &&
7206
                        ($str[$i + 2] & "\xC0") === "\x80"
7207
                        &&
7208
                        ($str[$i + 3] & "\xC0") === "\x80"
7209
                    ) {
7210
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7211
7212
                        $i += 3;
7213
                    }
7214
                }
7215
            }
7216
        }
7217
7218 85
        if ($length > 1) {
7219 11
            $ret = \array_chunk($ret, $length);
7220
7221 11
            return \array_map(
7222
                static function (array &$item): string {
7223 11
                    return \implode('', $item);
7224 11
                },
7225 11
                $ret
7226
            );
7227
        }
7228
7229 78
        if (isset($ret[0]) && $ret[0] === '') {
7230
            return [];
7231
        }
7232
7233 78
        return $ret;
7234
    }
7235
7236
    /**
7237
     * Splits the string with the provided regular expression, returning an
7238
     * array of Stringy objects. An optional integer $limit will truncate the
7239
     * results.
7240
     *
7241
     * @param string $str
7242
     * @param string $pattern <p>The regex with which to split the string.</p>
7243
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7244
     *
7245
     * @return string[] an array of strings
7246
     */
7247 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7248
    {
7249 16
        if ($limit === 0) {
7250 2
            return [];
7251
        }
7252
7253 14
        if ($pattern === '') {
7254 1
            return [$str];
7255
        }
7256
7257 13
        if (self::$SUPPORT['mbstring'] === true) {
7258 13
            if ($limit >= 0) {
7259
                /** @noinspection PhpComposerExtensionStubsInspection */
7260 8
                $result_tmp = \mb_split($pattern, $str);
7261
7262 8
                $result = [];
7263 8
                foreach ($result_tmp as $item_tmp) {
7264 8
                    if ($limit === 0) {
7265 4
                        break;
7266
                    }
7267 8
                    --$limit;
7268
7269 8
                    $result[] = $item_tmp;
7270
                }
7271
7272 8
                return $result;
7273
            }
7274
7275
            /** @noinspection PhpComposerExtensionStubsInspection */
7276 5
            return \mb_split($pattern, $str);
7277
        }
7278
7279
        if ($limit > 0) {
7280
            ++$limit;
7281
        } else {
7282
            $limit = -1;
7283
        }
7284
7285
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7286
7287
        if ($array === false) {
7288
            return [];
7289
        }
7290
7291
        if ($limit > 0 && \count($array) === $limit) {
7292
            \array_pop($array);
7293
        }
7294
7295
        return $array;
7296
    }
7297
7298
    /**
7299
     * Check if the string starts with the given substring.
7300
     *
7301
     * @param string $haystack <p>The string to search in.</p>
7302
     * @param string $needle   <p>The substring to search for.</p>
7303
     *
7304
     * @return bool
7305
     */
7306 19
    public static function str_starts_with(string $haystack, string $needle): bool
7307
    {
7308 19
        if ($needle === '') {
7309 2
            return true;
7310
        }
7311
7312 19
        if ($haystack === '') {
7313
            return false;
7314
        }
7315
7316 19
        return \strpos($haystack, $needle) === 0;
7317
    }
7318
7319
    /**
7320
     * Returns true if the string begins with any of $substrings, false otherwise.
7321
     *
7322
     * - case-sensitive
7323
     *
7324
     * @param string $str        <p>The input string.</p>
7325
     * @param array  $substrings <p>Substrings to look for.</p>
7326
     *
7327
     * @return bool whether or not $str starts with $substring
7328
     */
7329 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7330
    {
7331 8
        if ($str === '') {
7332
            return false;
7333
        }
7334
7335 8
        if ($substrings === []) {
7336
            return false;
7337
        }
7338
7339 8
        foreach ($substrings as &$substring) {
7340 8
            if (self::str_starts_with($str, $substring)) {
7341 8
                return true;
7342
            }
7343
        }
7344
7345 6
        return false;
7346
    }
7347
7348
    /**
7349
     * Gets the substring after the first occurrence of a separator.
7350
     *
7351
     * @param string $str       <p>The input string.</p>
7352
     * @param string $separator <p>The string separator.</p>
7353
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7354
     *
7355
     * @return string
7356
     */
7357 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7358
    {
7359 1
        if ($separator === '' || $str === '') {
7360 1
            return '';
7361
        }
7362
7363 1
        if ($encoding === 'UTF-8') {
7364 1
            $offset = \mb_strpos($str, $separator);
7365 1
            if ($offset === false) {
7366 1
                return '';
7367
            }
7368
7369 1
            return (string) \mb_substr(
7370 1
                $str,
7371 1
                $offset + (int) \mb_strlen($separator)
7372
            );
7373
        }
7374
7375
        $offset = self::strpos($str, $separator, 0, $encoding);
7376
        if ($offset === false) {
7377
            return '';
7378
        }
7379
7380
        return (string) \mb_substr(
7381
            $str,
7382
            $offset + (int) self::strlen($separator, $encoding),
7383
            null,
7384
            $encoding
7385
        );
7386
    }
7387
7388
    /**
7389
     * Gets the substring after the last occurrence of a separator.
7390
     *
7391
     * @param string $str       <p>The input string.</p>
7392
     * @param string $separator <p>The string separator.</p>
7393
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7394
     *
7395
     * @return string
7396
     */
7397 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7398
    {
7399 1
        if ($separator === '' || $str === '') {
7400 1
            return '';
7401
        }
7402
7403 1
        if ($encoding === 'UTF-8') {
7404 1
            $offset = \mb_strrpos($str, $separator);
7405 1
            if ($offset === false) {
7406 1
                return '';
7407
            }
7408
7409 1
            return (string) \mb_substr(
7410 1
                $str,
7411 1
                $offset + (int) \mb_strlen($separator)
7412
            );
7413
        }
7414
7415
        $offset = self::strrpos($str, $separator, 0, $encoding);
7416
        if ($offset === false) {
7417
            return '';
7418
        }
7419
7420
        return (string) self::substr(
7421
            $str,
7422
            $offset + (int) self::strlen($separator, $encoding),
7423
            null,
7424
            $encoding
7425
        );
7426
    }
7427
7428
    /**
7429
     * Gets the substring before the first occurrence of a separator.
7430
     *
7431
     * @param string $str       <p>The input string.</p>
7432
     * @param string $separator <p>The string separator.</p>
7433
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7434
     *
7435
     * @return string
7436
     */
7437 1
    public static function str_substr_before_first_separator(
7438
        string $str,
7439
        string $separator,
7440
        string $encoding = 'UTF-8'
7441
    ): string {
7442 1
        if ($separator === '' || $str === '') {
7443 1
            return '';
7444
        }
7445
7446 1
        if ($encoding === 'UTF-8') {
7447 1
            $offset = \mb_strpos($str, $separator);
7448 1
            if ($offset === false) {
7449 1
                return '';
7450
            }
7451
7452 1
            return (string) \mb_substr(
7453 1
                $str,
7454 1
                0,
7455 1
                $offset
7456
            );
7457
        }
7458
7459
        $offset = self::strpos($str, $separator, 0, $encoding);
7460
        if ($offset === false) {
7461
            return '';
7462
        }
7463
7464
        return (string) self::substr(
7465
            $str,
7466
            0,
7467
            $offset,
7468
            $encoding
7469
        );
7470
    }
7471
7472
    /**
7473
     * Gets the substring before the last occurrence of a separator.
7474
     *
7475
     * @param string $str       <p>The input string.</p>
7476
     * @param string $separator <p>The string separator.</p>
7477
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7478
     *
7479
     * @return string
7480
     */
7481 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7482
    {
7483 1
        if ($separator === '' || $str === '') {
7484 1
            return '';
7485
        }
7486
7487 1
        if ($encoding === 'UTF-8') {
7488 1
            $offset = \mb_strrpos($str, $separator);
7489 1
            if ($offset === false) {
7490 1
                return '';
7491
            }
7492
7493 1
            return (string) \mb_substr(
7494 1
                $str,
7495 1
                0,
7496 1
                $offset
7497
            );
7498
        }
7499
7500
        $offset = self::strrpos($str, $separator, 0, $encoding);
7501
        if ($offset === false) {
7502
            return '';
7503
        }
7504
7505
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7506
7507
        return (string) self::substr(
7508
            $str,
7509
            0,
7510
            $offset,
7511
            $encoding
7512
        );
7513
    }
7514
7515
    /**
7516
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7517
     *
7518
     * @param string $str           <p>The input string.</p>
7519
     * @param string $needle        <p>The string to look for.</p>
7520
     * @param bool   $before_needle [optional] <p>Default: false</p>
7521
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7522
     *
7523
     * @return string
7524
     */
7525 2
    public static function str_substr_first(
7526
        string $str,
7527
        string $needle,
7528
        bool $before_needle = false,
7529
        string $encoding = 'UTF-8'
7530
    ): string {
7531 2
        if ($str === '' || $needle === '') {
7532 2
            return '';
7533
        }
7534
7535 2
        if ($encoding === 'UTF-8') {
7536 2
            if ($before_needle === true) {
7537 1
                $part = \mb_strstr(
7538 1
                    $str,
7539 1
                    $needle,
7540 1
                    $before_needle
7541
                );
7542
            } else {
7543 1
                $part = \mb_strstr(
7544 1
                    $str,
7545 2
                    $needle
7546
                );
7547
            }
7548
        } else {
7549
            $part = self::strstr(
7550
                $str,
7551
                $needle,
7552
                $before_needle,
7553
                $encoding
7554
            );
7555
        }
7556
7557 2
        return $part === false ? '' : $part;
7558
    }
7559
7560
    /**
7561
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7562
     *
7563
     * @param string $str           <p>The input string.</p>
7564
     * @param string $needle        <p>The string to look for.</p>
7565
     * @param bool   $before_needle [optional] <p>Default: false</p>
7566
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7567
     *
7568
     * @return string
7569
     */
7570 2
    public static function str_substr_last(
7571
        string $str,
7572
        string $needle,
7573
        bool $before_needle = false,
7574
        string $encoding = 'UTF-8'
7575
    ): string {
7576 2
        if ($str === '' || $needle === '') {
7577 2
            return '';
7578
        }
7579
7580 2
        if ($encoding === 'UTF-8') {
7581 2
            if ($before_needle === true) {
7582 1
                $part = \mb_strrchr(
7583 1
                    $str,
7584 1
                    $needle,
7585 1
                    $before_needle
7586
                );
7587
            } else {
7588 1
                $part = \mb_strrchr(
7589 1
                    $str,
7590 2
                    $needle
7591
                );
7592
            }
7593
        } else {
7594
            $part = self::strrchr(
7595
                $str,
7596
                $needle,
7597
                $before_needle,
7598
                $encoding
7599
            );
7600
        }
7601
7602 2
        return $part === false ? '' : $part;
7603
    }
7604
7605
    /**
7606
     * Surrounds $str with the given substring.
7607
     *
7608
     * @param string $str
7609
     * @param string $substring <p>The substring to add to both sides.</P>
7610
     *
7611
     * @return string string with the substring both prepended and appended
7612
     */
7613 5
    public static function str_surround(string $str, string $substring): string
7614
    {
7615 5
        return $substring . $str . $substring;
7616
    }
7617
7618
    /**
7619
     * Returns a trimmed string with the first letter of each word capitalized.
7620
     * Also accepts an array, $ignore, allowing you to list words not to be
7621
     * capitalized.
7622
     *
7623
     * @param string              $str
7624
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7625
     *                                                           Default: null</p>
7626
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7627
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7628
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7629
     *                                                           tr</p>
7630
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7631
     *                                                           ß</p>
7632
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7633
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7634
     *
7635
     * @return string
7636
     *                <p>The titleized string.</p>
7637
     */
7638 10
    public static function str_titleize(
7639
        string $str,
7640
        array $ignore = null,
7641
        string $encoding = 'UTF-8',
7642
        bool $clean_utf8 = false,
7643
        string $lang = null,
7644
        bool $try_to_keep_the_string_length = false,
7645
        bool $use_trim_first = true,
7646
        string $word_define_chars = null
7647
    ): string {
7648 10
        if ($str === '') {
7649
            return '';
7650
        }
7651
7652 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7653 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7654
        }
7655
7656 10
        if ($use_trim_first === true) {
7657 10
            $str = \trim($str);
7658
        }
7659
7660 10
        if ($clean_utf8 === true) {
7661
            $str = self::clean($str);
7662
        }
7663
7664 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7665
7666 10
        if ($word_define_chars) {
7667 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7668
        } else {
7669 6
            $word_define_chars = '';
7670
        }
7671
7672 10
        $str = (string) \preg_replace_callback(
7673 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7674
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7675 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7676 4
                    return $match[0];
7677
                }
7678
7679 10
                if ($use_mb_functions === true) {
7680 10
                    if ($encoding === 'UTF-8') {
7681 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7682 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7683
                    }
7684
7685
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7686
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7687
                }
7688
7689
                return self::ucfirst(
7690
                    self::strtolower(
7691
                        $match[0],
7692
                        $encoding,
7693
                        false,
7694
                        $lang,
7695
                        $try_to_keep_the_string_length
7696
                    ),
7697
                    $encoding,
7698
                    false,
7699
                    $lang,
7700
                    $try_to_keep_the_string_length
7701
                );
7702 10
            },
7703 10
            $str
7704
        );
7705
7706 10
        return $str;
7707
    }
7708
7709
    /**
7710
     * Returns a trimmed string in proper title case.
7711
     *
7712
     * Also accepts an array, $ignore, allowing you to list words not to be
7713
     * capitalized.
7714
     *
7715
     * Adapted from John Gruber's script.
7716
     *
7717
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7718
     *
7719
     * @param string $str
7720
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7721
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7722
     *
7723
     * @return string the titleized string
7724
     */
7725 35
    public static function str_titleize_for_humans(
7726
        string $str,
7727
        array $ignore = [],
7728
        string $encoding = 'UTF-8'
7729
    ): string {
7730 35
        $small_words = \array_merge(
7731
            [
7732 35
                '(?<!q&)a',
7733
                'an',
7734
                'and',
7735
                'as',
7736
                'at(?!&t)',
7737
                'but',
7738
                'by',
7739
                'en',
7740
                'for',
7741
                'if',
7742
                'in',
7743
                'of',
7744
                'on',
7745
                'or',
7746
                'the',
7747
                'to',
7748
                'v[.]?',
7749
                'via',
7750
                'vs[.]?',
7751
            ],
7752 35
            $ignore
7753
        );
7754
7755 35
        $small_words_rx = \implode('|', $small_words);
7756 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7757
7758 35
        $str = \trim($str);
7759
7760 35
        if (self::has_lowercase($str) === false) {
7761 2
            $str = self::strtolower($str, $encoding);
7762
        }
7763
7764
        // the main substitutions
7765 35
        $str = (string) \preg_replace_callback(
7766
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7767
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7768 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7769
                        |
7770 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7771
                        |
7772 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7773
                        |
7774 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7775
                      ) (_*) \\b                                                          # 6. With trailing underscore
7776
                    ~ux',
7777
            /**
7778
             * @param string[] $matches
7779
             *
7780
             * @return string
7781
             */
7782
            static function (array $matches) use ($encoding): string {
7783
                // preserve leading underscore
7784 35
                $str = $matches[1];
7785 35
                if ($matches[2]) {
7786
                    // preserve URLs, domains, emails and file paths
7787 5
                    $str .= $matches[2];
7788 35
                } elseif ($matches[3]) {
7789
                    // lower-case small words
7790 25
                    $str .= self::strtolower($matches[3], $encoding);
7791 35
                } elseif ($matches[4]) {
7792
                    // capitalize word w/o internal caps
7793 34
                    $str .= static::ucfirst($matches[4], $encoding);
7794
                } else {
7795
                    // preserve other kinds of word (iPhone)
7796 7
                    $str .= $matches[5];
7797
                }
7798
                // preserve trailing underscore
7799 35
                $str .= $matches[6];
7800
7801 35
                return $str;
7802 35
            },
7803 35
            $str
7804
        );
7805
7806
        // Exceptions for small words: capitalize at start of title...
7807 35
        $str = (string) \preg_replace_callback(
7808
            '~(  \\A [[:punct:]]*            # start of title...
7809
                      |  [:.;?!][ ]+                # or of subsentence...
7810
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7811 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7812
                     ~uxi',
7813
            /**
7814
             * @param string[] $matches
7815
             *
7816
             * @return string
7817
             */
7818
            static function (array $matches) use ($encoding): string {
7819 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7820 35
            },
7821 35
            $str
7822
        );
7823
7824
        // ...and end of title
7825 35
        $str = (string) \preg_replace_callback(
7826 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
7827
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7828
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7829
                     ~uxi',
7830
            /**
7831
             * @param string[] $matches
7832
             *
7833
             * @return string
7834
             */
7835
            static function (array $matches) use ($encoding): string {
7836 3
                return static::ucfirst($matches[1], $encoding);
7837 35
            },
7838 35
            $str
7839
        );
7840
7841
        // Exceptions for small words in hyphenated compound words.
7842
        // e.g. "in-flight" -> In-Flight
7843 35
        $str = (string) \preg_replace_callback(
7844
            '~\\b
7845
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7846 35
                        ( ' . $small_words_rx . ' )
7847
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7848
                       ~uxi',
7849
            /**
7850
             * @param string[] $matches
7851
             *
7852
             * @return string
7853
             */
7854
            static function (array $matches) use ($encoding): string {
7855
                return static::ucfirst($matches[1], $encoding);
7856 35
            },
7857 35
            $str
7858
        );
7859
7860
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7861 35
        $str = (string) \preg_replace_callback(
7862
            '~\\b
7863
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7864
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7865 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
7866
                      (?!	- )                 # Negative lookahead for another -
7867
                     ~uxi',
7868
            /**
7869
             * @param string[] $matches
7870
             *
7871
             * @return string
7872
             */
7873
            static function (array $matches) use ($encoding): string {
7874
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7875 35
            },
7876 35
            $str
7877
        );
7878
7879 35
        return $str;
7880
    }
7881
7882
    /**
7883
     * Get a binary representation of a specific string.
7884
     *
7885
     * @param string $str <p>The input string.</p>
7886
     *
7887
     * @return false|string
7888
     *                      <p>false on error</p>
7889
     */
7890 2
    public static function str_to_binary(string $str)
7891
    {
7892 2
        $value = \unpack('H*', $str);
7893 2
        if ($value === false) {
7894
            return false;
7895
        }
7896
7897
        /** @noinspection OffsetOperationsInspection */
7898 2
        return \base_convert($value[1], 16, 2);
7899
    }
7900
7901
    /**
7902
     * @param string   $str
7903
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
7904
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
7905
     *
7906
     * @return string[]
7907
     */
7908 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
7909
    {
7910 17
        if ($str === '') {
7911 1
            return $remove_empty_values === true ? [] : [''];
7912
        }
7913
7914 16
        if (self::$SUPPORT['mbstring'] === true) {
7915
            /** @noinspection PhpComposerExtensionStubsInspection */
7916 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7917
        } else {
7918
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7919
        }
7920
7921 16
        if ($return === false) {
7922
            return $remove_empty_values === true ? [] : [''];
7923
        }
7924
7925
        if (
7926 16
            $remove_short_values === null
7927
            &&
7928 16
            $remove_empty_values === false
7929
        ) {
7930 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7931
        }
7932
7933
        return self::reduce_string_array(
7934
            $return,
7935
            $remove_empty_values,
7936
            $remove_short_values
7937
        );
7938
    }
7939
7940
    /**
7941
     * Convert a string into an array of words.
7942
     *
7943
     * @param string   $str
7944
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
7945
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
7946
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
7947
     *
7948
     * @return string[]
7949
     */
7950 13
    public static function str_to_words(
7951
        string $str,
7952
        string $char_list = '',
7953
        bool $remove_empty_values = false,
7954
        int $remove_short_values = null
7955
    ): array {
7956 13
        if ($str === '') {
7957 4
            return $remove_empty_values === true ? [] : [''];
7958
        }
7959
7960 13
        $char_list = self::rxClass($char_list, '\pL');
7961
7962 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7963 13
        if ($return === false) {
7964
            return $remove_empty_values === true ? [] : [''];
7965
        }
7966
7967
        if (
7968 13
            $remove_short_values === null
7969
            &&
7970 13
            $remove_empty_values === false
7971
        ) {
7972 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7973
        }
7974
7975 2
        $tmp_return = self::reduce_string_array(
7976 2
            $return,
7977 2
            $remove_empty_values,
7978 2
            $remove_short_values
7979
        );
7980
7981 2
        foreach ($tmp_return as &$item) {
7982 2
            $item = (string) $item;
7983
        }
7984
7985 2
        return $tmp_return;
7986
    }
7987
7988
    /**
7989
     * alias for "UTF8::to_ascii()"
7990
     *
7991
     * @param string $str
7992
     * @param string $unknown
7993
     * @param bool   $strict
7994
     *
7995
     * @return string
7996
     *
7997
     * @see UTF8::to_ascii()
7998
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
7999
     */
8000 7
    public static function str_transliterate(
8001
        string $str,
8002
        string $unknown = '?',
8003
        bool $strict = false
8004
    ): string {
8005 7
        return self::to_ascii($str, $unknown, $strict);
8006
    }
8007
8008
    /**
8009
     * Truncates the string to a given length. If $substring is provided, and
8010
     * truncating occurs, the string is further truncated so that the substring
8011
     * may be appended without exceeding the desired length.
8012
     *
8013
     * @param string $str
8014
     * @param int    $length    <p>Desired length of the truncated string.</p>
8015
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8016
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8017
     *
8018
     * @return string string after truncating
8019
     */
8020 22
    public static function str_truncate(
8021
        string $str,
8022
        int $length,
8023
        string $substring = '',
8024
        string $encoding = 'UTF-8'
8025
    ): string {
8026 22
        if ($str === '') {
8027
            return '';
8028
        }
8029
8030 22
        if ($encoding === 'UTF-8') {
8031 10
            if ($length >= (int) \mb_strlen($str)) {
8032 2
                return $str;
8033
            }
8034
8035 8
            if ($substring !== '') {
8036 4
                $length -= (int) \mb_strlen($substring);
8037
8038
                /** @noinspection UnnecessaryCastingInspection */
8039 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8040
            }
8041
8042
            /** @noinspection UnnecessaryCastingInspection */
8043 4
            return (string) \mb_substr($str, 0, $length);
8044
        }
8045
8046 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8047
8048 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8049 2
            return $str;
8050
        }
8051
8052 10
        if ($substring !== '') {
8053 6
            $length -= (int) self::strlen($substring, $encoding);
8054
        }
8055
8056
        return (
8057 10
               (string) self::substr(
8058 10
                   $str,
8059 10
                   0,
8060 10
                   $length,
8061 10
                   $encoding
8062
               )
8063 10
               ) . $substring;
8064
    }
8065
8066
    /**
8067
     * Truncates the string to a given length, while ensuring that it does not
8068
     * split words. If $substring is provided, and truncating occurs, the
8069
     * string is further truncated so that the substring may be appended without
8070
     * exceeding the desired length.
8071
     *
8072
     * @param string $str
8073
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8074
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8075
     *                                                       ''</p>
8076
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8077
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8078
     *
8079
     * @return string string after truncating
8080
     */
8081 47
    public static function str_truncate_safe(
8082
        string $str,
8083
        int $length,
8084
        string $substring = '',
8085
        string $encoding = 'UTF-8',
8086
        bool $ignore_do_not_split_words_for_one_word = false
8087
    ): string {
8088 47
        if ($str === '' || $length <= 0) {
8089 1
            return $substring;
8090
        }
8091
8092 47
        if ($encoding === 'UTF-8') {
8093 21
            if ($length >= (int) \mb_strlen($str)) {
8094 5
                return $str;
8095
            }
8096
8097
            // need to further trim the string so we can append the substring
8098 17
            $length -= (int) \mb_strlen($substring);
8099 17
            if ($length <= 0) {
8100 1
                return $substring;
8101
            }
8102
8103 17
            $truncated = \mb_substr($str, 0, $length);
8104
8105 17
            if ($truncated === false) {
8106
                return '';
8107
            }
8108
8109
            // if the last word was truncated
8110 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8111 17
            if ($space_position !== $length) {
8112
                // find pos of the last occurrence of a space, get up to that
8113 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8114
8115
                if (
8116 13
                    $last_position !== false
8117
                    ||
8118 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8119
                ) {
8120 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8121
                }
8122
            }
8123
        } else {
8124 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8125
8126 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8127 4
                return $str;
8128
            }
8129
8130
            // need to further trim the string so we can append the substring
8131 22
            $length -= (int) self::strlen($substring, $encoding);
8132 22
            if ($length <= 0) {
8133
                return $substring;
8134
            }
8135
8136 22
            $truncated = self::substr($str, 0, $length, $encoding);
8137
8138 22
            if ($truncated === false) {
8139
                return '';
8140
            }
8141
8142
            // if the last word was truncated
8143 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8144 22
            if ($space_position !== $length) {
8145
                // find pos of the last occurrence of a space, get up to that
8146 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8147
8148
                if (
8149 12
                    $last_position !== false
8150
                    ||
8151 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8152
                ) {
8153 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8154
                }
8155
            }
8156
        }
8157
8158 39
        return $truncated . $substring;
8159
    }
8160
8161
    /**
8162
     * Returns a lowercase and trimmed string separated by underscores.
8163
     * Underscores are inserted before uppercase characters (with the exception
8164
     * of the first character of the string), and in place of spaces as well as
8165
     * dashes.
8166
     *
8167
     * @param string $str
8168
     *
8169
     * @return string the underscored string
8170
     */
8171 16
    public static function str_underscored(string $str): string
8172
    {
8173 16
        return self::str_delimit($str, '_');
8174
    }
8175
8176
    /**
8177
     * Returns an UpperCamelCase version of the supplied string. It trims
8178
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8179
     * and underscores, and removes spaces, dashes, underscores.
8180
     *
8181
     * @param string      $str                           <p>The input string.</p>
8182
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8183
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8184
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8185
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8186
     *
8187
     * @return string string in UpperCamelCase
8188
     */
8189 13
    public static function str_upper_camelize(
8190
        string $str,
8191
        string $encoding = 'UTF-8',
8192
        bool $clean_utf8 = false,
8193
        string $lang = null,
8194
        bool $try_to_keep_the_string_length = false
8195
    ): string {
8196 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8197
    }
8198
8199
    /**
8200
     * alias for "UTF8::ucfirst()"
8201
     *
8202
     * @param string      $str
8203
     * @param string      $encoding
8204
     * @param bool        $clean_utf8
8205
     * @param string|null $lang
8206
     * @param bool        $try_to_keep_the_string_length
8207
     *
8208
     * @return string
8209
     *
8210
     * @see UTF8::ucfirst()
8211
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8212
     */
8213 5
    public static function str_upper_first(
8214
        string $str,
8215
        string $encoding = 'UTF-8',
8216
        bool $clean_utf8 = false,
8217
        string $lang = null,
8218
        bool $try_to_keep_the_string_length = false
8219
    ): string {
8220 5
        return self::ucfirst(
8221 5
            $str,
8222 5
            $encoding,
8223 5
            $clean_utf8,
8224 5
            $lang,
8225 5
            $try_to_keep_the_string_length
8226
        );
8227
    }
8228
8229
    /**
8230
     * Counts number of words in the UTF-8 string.
8231
     *
8232
     * @param string $str       <p>The input string.</p>
8233
     * @param int    $format    [optional] <p>
8234
     *                          <strong>0</strong> => return a number of words (default)<br>
8235
     *                          <strong>1</strong> => return an array of words<br>
8236
     *                          <strong>2</strong> => return an array of words with word-offset as key
8237
     *                          </p>
8238
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8239
     *
8240
     * @return int|string[] The number of words in the string
8241
     */
8242 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8243
    {
8244 2
        $str_parts = self::str_to_words($str, $char_list);
8245
8246 2
        $len = \count($str_parts);
8247
8248 2
        if ($format === 1) {
8249 2
            $number_of_words = [];
8250 2
            for ($i = 1; $i < $len; $i += 2) {
8251 2
                $number_of_words[] = $str_parts[$i];
8252
            }
8253 2
        } elseif ($format === 2) {
8254 2
            $number_of_words = [];
8255 2
            $offset = (int) self::strlen($str_parts[0]);
8256 2
            for ($i = 1; $i < $len; $i += 2) {
8257 2
                $number_of_words[$offset] = $str_parts[$i];
8258 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8259
            }
8260
        } else {
8261 2
            $number_of_words = (int) (($len - 1) / 2);
8262
        }
8263
8264 2
        return $number_of_words;
8265
    }
8266
8267
    /**
8268
     * Case-insensitive string comparison.
8269
     *
8270
     * INFO: Case-insensitive version of UTF8::strcmp()
8271
     *
8272
     * @param string $str1     <p>The first string.</p>
8273
     * @param string $str2     <p>The second string.</p>
8274
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8275
     *
8276
     * @return int
8277
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8278
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8279
     *             <strong>0</strong> if they are equal
8280
     */
8281 23
    public static function strcasecmp(
8282
        string $str1,
8283
        string $str2,
8284
        string $encoding = 'UTF-8'
8285
    ): int {
8286 23
        return self::strcmp(
8287 23
            self::strtocasefold(
8288 23
                $str1,
8289 23
                true,
8290 23
                false,
8291 23
                $encoding,
8292 23
                null,
8293 23
                false
8294
            ),
8295 23
            self::strtocasefold(
8296 23
                $str2,
8297 23
                true,
8298 23
                false,
8299 23
                $encoding,
8300 23
                null,
8301 23
                false
8302
            )
8303
        );
8304
    }
8305
8306
    /**
8307
     * alias for "UTF8::strstr()"
8308
     *
8309
     * @param string $haystack
8310
     * @param string $needle
8311
     * @param bool   $before_needle
8312
     * @param string $encoding
8313
     * @param bool   $clean_utf8
8314
     *
8315
     * @return false|string
8316
     *
8317
     * @see UTF8::strstr()
8318
     */
8319 2
    public static function strchr(
8320
        string $haystack,
8321
        string $needle,
8322
        bool $before_needle = false,
8323
        string $encoding = 'UTF-8',
8324
        bool $clean_utf8 = false
8325
    ) {
8326 2
        return self::strstr(
8327 2
            $haystack,
8328 2
            $needle,
8329 2
            $before_needle,
8330 2
            $encoding,
8331 2
            $clean_utf8
8332
        );
8333
    }
8334
8335
    /**
8336
     * Case-sensitive string comparison.
8337
     *
8338
     * @param string $str1 <p>The first string.</p>
8339
     * @param string $str2 <p>The second string.</p>
8340
     *
8341
     * @return int
8342
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8343
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8344
     *             <strong>0</strong> if they are equal
8345
     */
8346 29
    public static function strcmp(string $str1, string $str2): int
8347
    {
8348 29
        if ($str1 === $str2) {
8349 21
            return 0;
8350
        }
8351
8352 24
        return \strcmp(
8353 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8354 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8355
        );
8356
    }
8357
8358
    /**
8359
     * Find length of initial segment not matching mask.
8360
     *
8361
     * @param string $str
8362
     * @param string $char_list
8363
     * @param int    $offset
8364
     * @param int    $length
8365
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8366
     *
8367
     * @return int
8368
     */
8369 12
    public static function strcspn(
8370
        string $str,
8371
        string $char_list,
8372
        int $offset = null,
8373
        int $length = null,
8374
        string $encoding = 'UTF-8'
8375
    ): int {
8376 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8377
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8378
        }
8379
8380 12
        if ($char_list === '') {
8381 2
            return (int) self::strlen($str, $encoding);
8382
        }
8383
8384 11
        if ($offset !== null || $length !== null) {
8385 3
            if ($encoding === 'UTF-8') {
8386 3
                if ($length === null) {
8387
                    /** @noinspection UnnecessaryCastingInspection */
8388 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8389
                } else {
8390
                    /** @noinspection UnnecessaryCastingInspection */
8391 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8392
                }
8393
            } else {
8394
                /** @noinspection UnnecessaryCastingInspection */
8395
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8396
            }
8397
8398 3
            if ($str_tmp === false) {
8399
                return 0;
8400
            }
8401
8402
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8403 3
            $str = $str_tmp;
8404
        }
8405
8406 11
        if ($str === '') {
8407 2
            return 0;
8408
        }
8409
8410 10
        $matches = [];
8411 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8412 9
            $return = self::strlen($matches[1], $encoding);
8413 9
            if ($return === false) {
8414
                return 0;
8415
            }
8416
8417 9
            return $return;
8418
        }
8419
8420 2
        return (int) self::strlen($str, $encoding);
8421
    }
8422
8423
    /**
8424
     * alias for "UTF8::stristr()"
8425
     *
8426
     * @param string $haystack
8427
     * @param string $needle
8428
     * @param bool   $before_needle
8429
     * @param string $encoding
8430
     * @param bool   $clean_utf8
8431
     *
8432
     * @return false|string
8433
     *
8434
     * @see UTF8::stristr()
8435
     */
8436 1
    public static function strichr(
8437
        string $haystack,
8438
        string $needle,
8439
        bool $before_needle = false,
8440
        string $encoding = 'UTF-8',
8441
        bool $clean_utf8 = false
8442
    ) {
8443 1
        return self::stristr(
8444 1
            $haystack,
8445 1
            $needle,
8446 1
            $before_needle,
8447 1
            $encoding,
8448 1
            $clean_utf8
8449
        );
8450
    }
8451
8452
    /**
8453
     * Create a UTF-8 string from code points.
8454
     *
8455
     * INFO: opposite to UTF8::codepoints()
8456
     *
8457
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8458
     *
8459
     * @return string UTF-8 encoded string
8460
     */
8461 4
    public static function string(array $array): string
8462
    {
8463 4
        return \implode(
8464 4
            '',
8465 4
            \array_map(
8466
                [
8467 4
                    self::class,
8468
                    'chr',
8469
                ],
8470 4
                $array
8471
            )
8472
        );
8473
    }
8474
8475
    /**
8476
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8477
     *
8478
     * @param string $str <p>The input string.</p>
8479
     *
8480
     * @return bool
8481
     *              <strong>true</strong> if the string has BOM at the start,<br>
8482
     *              <strong>false</strong> otherwise
8483
     */
8484 6
    public static function string_has_bom(string $str): bool
8485
    {
8486
        /** @noinspection PhpUnusedLocalVariableInspection */
8487 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8488 6
            if (\strpos($str, $bom_string) === 0) {
8489 6
                return true;
8490
            }
8491
        }
8492
8493 6
        return false;
8494
    }
8495
8496
    /**
8497
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8498
     *
8499
     * @see http://php.net/manual/en/function.strip-tags.php
8500
     *
8501
     * @param string $str            <p>
8502
     *                               The input string.
8503
     *                               </p>
8504
     * @param string $allowable_tags [optional] <p>
8505
     *                               You can use the optional second parameter to specify tags which should
8506
     *                               not be stripped.
8507
     *                               </p>
8508
     *                               <p>
8509
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8510
     *                               can not be changed with allowable_tags.
8511
     *                               </p>
8512
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8513
     *
8514
     * @return string
8515
     *                <p>The stripped string.</p>
8516
     */
8517 4
    public static function strip_tags(
8518
        string $str,
8519
        string $allowable_tags = null,
8520
        bool $clean_utf8 = false
8521
    ): string {
8522 4
        if ($str === '') {
8523 1
            return '';
8524
        }
8525
8526 4
        if ($clean_utf8 === true) {
8527 2
            $str = self::clean($str);
8528
        }
8529
8530 4
        if ($allowable_tags === null) {
8531 4
            return \strip_tags($str);
8532
        }
8533
8534 2
        return \strip_tags($str, $allowable_tags);
8535
    }
8536
8537
    /**
8538
     * Strip all whitespace characters. This includes tabs and newline
8539
     * characters, as well as multibyte whitespace such as the thin space
8540
     * and ideographic space.
8541
     *
8542
     * @param string $str
8543
     *
8544
     * @return string
8545
     */
8546 36
    public static function strip_whitespace(string $str): string
8547
    {
8548 36
        if ($str === '') {
8549 3
            return '';
8550
        }
8551
8552 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8553
    }
8554
8555
    /**
8556
     * Finds position of first occurrence of a string within another, case insensitive.
8557
     *
8558
     * @see http://php.net/manual/en/function.mb-stripos.php
8559
     *
8560
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8561
     * @param string $needle     <p>The string to find in haystack.</p>
8562
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8563
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8564
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8565
     *
8566
     * @return false|int
8567
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8568
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8569
     */
8570 24
    public static function stripos(
8571
        string $haystack,
8572
        string $needle,
8573
        int $offset = 0,
8574
        $encoding = 'UTF-8',
8575
        bool $clean_utf8 = false
8576
    ) {
8577 24
        if ($haystack === '' || $needle === '') {
8578 5
            return false;
8579
        }
8580
8581 23
        if ($clean_utf8 === true) {
8582
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8583
            // if invalid characters are found in $haystack before $needle
8584 1
            $haystack = self::clean($haystack);
8585 1
            $needle = self::clean($needle);
8586
        }
8587
8588 23
        if (self::$SUPPORT['mbstring'] === true) {
8589 23
            if ($encoding === 'UTF-8') {
8590 23
                return \mb_stripos($haystack, $needle, $offset);
8591
            }
8592
8593 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8594
8595 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8596
        }
8597
8598 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8599
8600
        if (
8601 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8602
            &&
8603 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8604
            &&
8605 2
            self::$SUPPORT['intl'] === true
8606
        ) {
8607
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8608
            if ($return_tmp !== false) {
8609
                return $return_tmp;
8610
            }
8611
        }
8612
8613
        //
8614
        // fallback for ascii only
8615
        //
8616
8617 2
        if (ASCII::is_ascii($haystack . $needle)) {
8618
            return \stripos($haystack, $needle, $offset);
8619
        }
8620
8621
        //
8622
        // fallback via vanilla php
8623
        //
8624
8625 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8626 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8627
8628 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8629
    }
8630
8631
    /**
8632
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8633
     *
8634
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8635
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8636
     * @param bool   $before_needle [optional] <p>
8637
     *                              If <b>TRUE</b>, it returns the part of the
8638
     *                              haystack before the first occurrence of the needle (excluding the needle).
8639
     *                              </p>
8640
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8641
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8642
     *
8643
     * @return false|string
8644
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8645
     */
8646 12
    public static function stristr(
8647
        string $haystack,
8648
        string $needle,
8649
        bool $before_needle = false,
8650
        string $encoding = 'UTF-8',
8651
        bool $clean_utf8 = false
8652
    ) {
8653 12
        if ($haystack === '' || $needle === '') {
8654 3
            return false;
8655
        }
8656
8657 9
        if ($clean_utf8 === true) {
8658
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8659
            // if invalid characters are found in $haystack before $needle
8660 1
            $needle = self::clean($needle);
8661 1
            $haystack = self::clean($haystack);
8662
        }
8663
8664 9
        if (!$needle) {
8665
            return $haystack;
8666
        }
8667
8668 9
        if (self::$SUPPORT['mbstring'] === true) {
8669 9
            if ($encoding === 'UTF-8') {
8670 9
                return \mb_stristr($haystack, $needle, $before_needle);
8671
            }
8672
8673 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8674
8675 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8676
        }
8677
8678
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8679
8680
        if (
8681
            $encoding !== 'UTF-8'
8682
            &&
8683
            self::$SUPPORT['mbstring'] === false
8684
        ) {
8685
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8686
        }
8687
8688
        if (
8689
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8690
            &&
8691
            self::$SUPPORT['intl'] === true
8692
        ) {
8693
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8694
            if ($return_tmp !== false) {
8695
                return $return_tmp;
8696
            }
8697
        }
8698
8699
        if (ASCII::is_ascii($needle . $haystack)) {
8700
            return \stristr($haystack, $needle, $before_needle);
8701
        }
8702
8703
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8704
8705
        if (!isset($match[1])) {
8706
            return false;
8707
        }
8708
8709
        if ($before_needle) {
8710
            return $match[1];
8711
        }
8712
8713
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8714
    }
8715
8716
    /**
8717
     * Get the string length, not the byte-length!
8718
     *
8719
     * @see http://php.net/manual/en/function.mb-strlen.php
8720
     *
8721
     * @param string $str        <p>The string being checked for length.</p>
8722
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8723
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8724
     *
8725
     * @return false|int
8726
     *                   <p>
8727
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8728
     *                   $encoding.
8729
     *                   (One multi-byte character counted as +1).
8730
     *                   <br>
8731
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8732
     *                   chars.
8733
     *                   </p>
8734
     */
8735 173
    public static function strlen(
8736
        string $str,
8737
        string $encoding = 'UTF-8',
8738
        bool $clean_utf8 = false
8739
    ) {
8740 173
        if ($str === '') {
8741 21
            return 0;
8742
        }
8743
8744 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8745 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8746
        }
8747
8748 171
        if ($clean_utf8 === true) {
8749
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8750
            // if invalid characters are found in $str
8751 4
            $str = self::clean($str);
8752
        }
8753
8754
        //
8755
        // fallback via mbstring
8756
        //
8757
8758 171
        if (self::$SUPPORT['mbstring'] === true) {
8759 165
            if ($encoding === 'UTF-8') {
8760 165
                return \mb_strlen($str);
8761
            }
8762
8763 4
            return \mb_strlen($str, $encoding);
8764
        }
8765
8766
        //
8767
        // fallback for binary || ascii only
8768
        //
8769
8770
        if (
8771 8
            $encoding === 'CP850'
8772
            ||
8773 8
            $encoding === 'ASCII'
8774
        ) {
8775
            return \strlen($str);
8776
        }
8777
8778
        if (
8779 8
            $encoding !== 'UTF-8'
8780
            &&
8781 8
            self::$SUPPORT['mbstring'] === false
8782
            &&
8783 8
            self::$SUPPORT['iconv'] === false
8784
        ) {
8785 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8786
        }
8787
8788
        //
8789
        // fallback via iconv
8790
        //
8791
8792 8
        if (self::$SUPPORT['iconv'] === true) {
8793
            $return_tmp = \iconv_strlen($str, $encoding);
8794
            if ($return_tmp !== false) {
8795
                return $return_tmp;
8796
            }
8797
        }
8798
8799
        //
8800
        // fallback via intl
8801
        //
8802
8803
        if (
8804 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8805
            &&
8806 8
            self::$SUPPORT['intl'] === true
8807
        ) {
8808
            $return_tmp = \grapheme_strlen($str);
8809
            if ($return_tmp !== null) {
8810
                return $return_tmp;
8811
            }
8812
        }
8813
8814
        //
8815
        // fallback for ascii only
8816
        //
8817
8818 8
        if (ASCII::is_ascii($str)) {
8819 4
            return \strlen($str);
8820
        }
8821
8822
        //
8823
        // fallback via vanilla php
8824
        //
8825
8826 8
        \preg_match_all('/./us', $str, $parts);
8827
8828 8
        $return_tmp = \count($parts[0]);
8829 8
        if ($return_tmp === 0) {
8830
            return false;
8831
        }
8832
8833 8
        return $return_tmp;
8834
    }
8835
8836
    /**
8837
     * Get string length in byte.
8838
     *
8839
     * @param string $str
8840
     *
8841
     * @return int
8842
     */
8843
    public static function strlen_in_byte(string $str): int
8844
    {
8845
        if ($str === '') {
8846
            return 0;
8847
        }
8848
8849
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8850
            // "mb_" is available if overload is used, so use it ...
8851
            return \mb_strlen($str, 'CP850'); // 8-BIT
8852
        }
8853
8854
        return \strlen($str);
8855
    }
8856
8857
    /**
8858
     * Case insensitive string comparisons using a "natural order" algorithm.
8859
     *
8860
     * INFO: natural order version of UTF8::strcasecmp()
8861
     *
8862
     * @param string $str1     <p>The first string.</p>
8863
     * @param string $str2     <p>The second string.</p>
8864
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8865
     *
8866
     * @return int
8867
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8868
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8869
     *             <strong>0</strong> if they are equal
8870
     */
8871 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8872
    {
8873 2
        return self::strnatcmp(
8874 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8875 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8876
        );
8877
    }
8878
8879
    /**
8880
     * String comparisons using a "natural order" algorithm
8881
     *
8882
     * INFO: natural order version of UTF8::strcmp()
8883
     *
8884
     * @see http://php.net/manual/en/function.strnatcmp.php
8885
     *
8886
     * @param string $str1 <p>The first string.</p>
8887
     * @param string $str2 <p>The second string.</p>
8888
     *
8889
     * @return int
8890
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8891
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8892
     *             <strong>0</strong> if they are equal
8893
     */
8894 4
    public static function strnatcmp(string $str1, string $str2): int
8895
    {
8896 4
        if ($str1 === $str2) {
8897 4
            return 0;
8898
        }
8899
8900 4
        return \strnatcmp(
8901 4
            (string) self::strtonatfold($str1),
8902 4
            (string) self::strtonatfold($str2)
8903
        );
8904
    }
8905
8906
    /**
8907
     * Case-insensitive string comparison of the first n characters.
8908
     *
8909
     * @see http://php.net/manual/en/function.strncasecmp.php
8910
     *
8911
     * @param string $str1     <p>The first string.</p>
8912
     * @param string $str2     <p>The second string.</p>
8913
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8914
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8915
     *
8916
     * @return int
8917
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8918
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8919
     *             <strong>0</strong> if they are equal
8920
     */
8921 2
    public static function strncasecmp(
8922
        string $str1,
8923
        string $str2,
8924
        int $len,
8925
        string $encoding = 'UTF-8'
8926
    ): int {
8927 2
        return self::strncmp(
8928 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8929 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8930 2
            $len
8931
        );
8932
    }
8933
8934
    /**
8935
     * String comparison of the first n characters.
8936
     *
8937
     * @see http://php.net/manual/en/function.strncmp.php
8938
     *
8939
     * @param string $str1     <p>The first string.</p>
8940
     * @param string $str2     <p>The second string.</p>
8941
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8942
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8943
     *
8944
     * @return int
8945
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8946
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8947
     *             <strong>0</strong> if they are equal
8948
     */
8949 4
    public static function strncmp(
8950
        string $str1,
8951
        string $str2,
8952
        int $len,
8953
        string $encoding = 'UTF-8'
8954
    ): int {
8955 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8956
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8957
        }
8958
8959 4
        if ($encoding === 'UTF-8') {
8960 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8961 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8962
        } else {
8963
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8964
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8965
        }
8966
8967 4
        return self::strcmp($str1, $str2);
8968
    }
8969
8970
    /**
8971
     * Search a string for any of a set of characters.
8972
     *
8973
     * @see http://php.net/manual/en/function.strpbrk.php
8974
     *
8975
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8976
     * @param string $char_list <p>This parameter is case sensitive.</p>
8977
     *
8978
     * @return false|string string starting from the character found, or false if it is not found
8979
     */
8980 2
    public static function strpbrk(string $haystack, string $char_list)
8981
    {
8982 2
        if ($haystack === '' || $char_list === '') {
8983 2
            return false;
8984
        }
8985
8986 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8987 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8988
        }
8989
8990 2
        return false;
8991
    }
8992
8993
    /**
8994
     * Find position of first occurrence of string in a string.
8995
     *
8996
     * @see http://php.net/manual/en/function.mb-strpos.php
8997
     *
8998
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8999
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9000
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9001
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9002
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9003
     *
9004
     * @return false|int
9005
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9006
     *                   string.<br> If needle is not found it returns false.
9007
     */
9008 53
    public static function strpos(
9009
        string $haystack,
9010
        $needle,
9011
        int $offset = 0,
9012
        $encoding = 'UTF-8',
9013
        bool $clean_utf8 = false
9014
    ) {
9015 53
        if ($haystack === '') {
9016 4
            return false;
9017
        }
9018
9019
        // iconv and mbstring do not support integer $needle
9020 52
        if ((int) $needle === $needle) {
9021
            $needle = (string) self::chr($needle);
9022
        }
9023 52
        $needle = (string) $needle;
9024
9025 52
        if ($needle === '') {
9026 2
            return false;
9027
        }
9028
9029 52
        if ($clean_utf8 === true) {
9030
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9031
            // if invalid characters are found in $haystack before $needle
9032 3
            $needle = self::clean($needle);
9033 3
            $haystack = self::clean($haystack);
9034
        }
9035
9036 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9037 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9038
        }
9039
9040
        //
9041
        // fallback via mbstring
9042
        //
9043
9044 52
        if (self::$SUPPORT['mbstring'] === true) {
9045 50
            if ($encoding === 'UTF-8') {
9046 50
                return \mb_strpos($haystack, $needle, $offset);
9047
            }
9048
9049 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9050
        }
9051
9052
        //
9053
        // fallback for binary || ascii only
9054
        //
9055
        if (
9056 4
            $encoding === 'CP850'
9057
            ||
9058 4
            $encoding === 'ASCII'
9059
        ) {
9060 2
            return \strpos($haystack, $needle, $offset);
9061
        }
9062
9063
        if (
9064 4
            $encoding !== 'UTF-8'
9065
            &&
9066 4
            self::$SUPPORT['iconv'] === false
9067
            &&
9068 4
            self::$SUPPORT['mbstring'] === false
9069
        ) {
9070 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9071
        }
9072
9073
        //
9074
        // fallback via intl
9075
        //
9076
9077
        if (
9078 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9079
            &&
9080 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9081
            &&
9082 4
            self::$SUPPORT['intl'] === true
9083
        ) {
9084
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9085
            if ($return_tmp !== false) {
9086
                return $return_tmp;
9087
            }
9088
        }
9089
9090
        //
9091
        // fallback via iconv
9092
        //
9093
9094
        if (
9095 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9096
            &&
9097 4
            self::$SUPPORT['iconv'] === true
9098
        ) {
9099
            // ignore invalid negative offset to keep compatibility
9100
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9101
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9102
            if ($return_tmp !== false) {
9103
                return $return_tmp;
9104
            }
9105
        }
9106
9107
        //
9108
        // fallback for ascii only
9109
        //
9110
9111 4
        if (ASCII::is_ascii($haystack . $needle)) {
9112 2
            return \strpos($haystack, $needle, $offset);
9113
        }
9114
9115
        //
9116
        // fallback via vanilla php
9117
        //
9118
9119 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9120 4
        if ($haystack_tmp === false) {
9121
            $haystack_tmp = '';
9122
        }
9123 4
        $haystack = (string) $haystack_tmp;
9124
9125 4
        if ($offset < 0) {
9126
            $offset = 0;
9127
        }
9128
9129 4
        $pos = \strpos($haystack, $needle);
9130 4
        if ($pos === false) {
9131 2
            return false;
9132
        }
9133
9134 4
        if ($pos) {
9135 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9136
        }
9137
9138 2
        return $offset + 0;
9139
    }
9140
9141
    /**
9142
     * Find position of first occurrence of string in a string.
9143
     *
9144
     * @param string $haystack <p>
9145
     *                         The string being checked.
9146
     *                         </p>
9147
     * @param string $needle   <p>
9148
     *                         The position counted from the beginning of haystack.
9149
     *                         </p>
9150
     * @param int    $offset   [optional] <p>
9151
     *                         The search offset. If it is not specified, 0 is used.
9152
     *                         </p>
9153
     *
9154
     * @return false|int The numeric position of the first occurrence of needle in the
9155
     *                   haystack string. If needle is not found, it returns false.
9156
     */
9157
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9158
    {
9159
        if ($haystack === '' || $needle === '') {
9160
            return false;
9161
        }
9162
9163
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9164
            // "mb_" is available if overload is used, so use it ...
9165
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9166
        }
9167
9168
        return \strpos($haystack, $needle, $offset);
9169
    }
9170
9171
    /**
9172
     * Finds the last occurrence of a character in a string within another.
9173
     *
9174
     * @see http://php.net/manual/en/function.mb-strrchr.php
9175
     *
9176
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9177
     * @param string $needle        <p>The string to find in haystack</p>
9178
     * @param bool   $before_needle [optional] <p>
9179
     *                              Determines which portion of haystack
9180
     *                              this function returns.
9181
     *                              If set to true, it returns all of haystack
9182
     *                              from the beginning to the last occurrence of needle.
9183
     *                              If set to false, it returns all of haystack
9184
     *                              from the last occurrence of needle to the end,
9185
     *                              </p>
9186
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9187
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9188
     *
9189
     * @return false|string the portion of haystack or false if needle is not found
9190
     */
9191 2
    public static function strrchr(
9192
        string $haystack,
9193
        string $needle,
9194
        bool $before_needle = false,
9195
        string $encoding = 'UTF-8',
9196
        bool $clean_utf8 = false
9197
    ) {
9198 2
        if ($haystack === '' || $needle === '') {
9199 2
            return false;
9200
        }
9201
9202 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9203 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9204
        }
9205
9206 2
        if ($clean_utf8 === true) {
9207
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9208
            // if invalid characters are found in $haystack before $needle
9209 2
            $needle = self::clean($needle);
9210 2
            $haystack = self::clean($haystack);
9211
        }
9212
9213
        //
9214
        // fallback via mbstring
9215
        //
9216
9217 2
        if (self::$SUPPORT['mbstring'] === true) {
9218 2
            if ($encoding === 'UTF-8') {
9219 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9220
            }
9221
9222 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9223
        }
9224
9225
        //
9226
        // fallback for binary || ascii only
9227
        //
9228
9229
        if (
9230
            $before_needle === false
9231
            &&
9232
            (
9233
                $encoding === 'CP850'
9234
                ||
9235
                $encoding === 'ASCII'
9236
            )
9237
        ) {
9238
            return \strrchr($haystack, $needle);
9239
        }
9240
9241
        if (
9242
            $encoding !== 'UTF-8'
9243
            &&
9244
            self::$SUPPORT['mbstring'] === false
9245
        ) {
9246
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9247
        }
9248
9249
        //
9250
        // fallback via iconv
9251
        //
9252
9253
        if (self::$SUPPORT['iconv'] === true) {
9254
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9255
            if ($needle_tmp === false) {
9256
                return false;
9257
            }
9258
            $needle = (string) $needle_tmp;
9259
9260
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9261
            if ($pos === false) {
9262
                return false;
9263
            }
9264
9265
            if ($before_needle) {
9266
                return self::substr($haystack, 0, $pos, $encoding);
9267
            }
9268
9269
            return self::substr($haystack, $pos, null, $encoding);
9270
        }
9271
9272
        //
9273
        // fallback via vanilla php
9274
        //
9275
9276
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9277
        if ($needle_tmp === false) {
9278
            return false;
9279
        }
9280
        $needle = (string) $needle_tmp;
9281
9282
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9283
        if ($pos === false) {
9284
            return false;
9285
        }
9286
9287
        if ($before_needle) {
9288
            return self::substr($haystack, 0, $pos, $encoding);
9289
        }
9290
9291
        return self::substr($haystack, $pos, null, $encoding);
9292
    }
9293
9294
    /**
9295
     * Reverses characters order in the string.
9296
     *
9297
     * @param string $str      <p>The input string.</p>
9298
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9299
     *
9300
     * @return string the string with characters in the reverse sequence
9301
     */
9302 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9303
    {
9304 10
        if ($str === '') {
9305 4
            return '';
9306
        }
9307
9308
        // init
9309 8
        $reversed = '';
9310
9311 8
        $str = self::emoji_encode($str, true);
9312
9313 8
        if ($encoding === 'UTF-8') {
9314 8
            if (self::$SUPPORT['intl'] === true) {
9315
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9316 8
                $i = (int) \grapheme_strlen($str);
9317 8
                while ($i--) {
9318 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9319 8
                    if ($reversed_tmp !== false) {
9320 8
                        $reversed .= $reversed_tmp;
9321
                    }
9322
                }
9323
            } else {
9324
                $i = (int) \mb_strlen($str);
9325 8
                while ($i--) {
9326
                    $reversed_tmp = \mb_substr($str, $i, 1);
9327
                    if ($reversed_tmp !== false) {
9328
                        $reversed .= $reversed_tmp;
9329
                    }
9330
                }
9331
            }
9332
        } else {
9333
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9334
9335
            $i = (int) self::strlen($str, $encoding);
9336
            while ($i--) {
9337
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9338
                if ($reversed_tmp !== false) {
9339
                    $reversed .= $reversed_tmp;
9340
                }
9341
            }
9342
        }
9343
9344 8
        return self::emoji_decode($reversed, true);
9345
    }
9346
9347
    /**
9348
     * Finds the last occurrence of a character in a string within another, case insensitive.
9349
     *
9350
     * @see http://php.net/manual/en/function.mb-strrichr.php
9351
     *
9352
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9353
     * @param string $needle        <p>The string to find in haystack.</p>
9354
     * @param bool   $before_needle [optional] <p>
9355
     *                              Determines which portion of haystack
9356
     *                              this function returns.
9357
     *                              If set to true, it returns all of haystack
9358
     *                              from the beginning to the last occurrence of needle.
9359
     *                              If set to false, it returns all of haystack
9360
     *                              from the last occurrence of needle to the end,
9361
     *                              </p>
9362
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9363
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9364
     *
9365
     * @return false|string the portion of haystack or<br>false if needle is not found
9366
     */
9367 3
    public static function strrichr(
9368
        string $haystack,
9369
        string $needle,
9370
        bool $before_needle = false,
9371
        string $encoding = 'UTF-8',
9372
        bool $clean_utf8 = false
9373
    ) {
9374 3
        if ($haystack === '' || $needle === '') {
9375 2
            return false;
9376
        }
9377
9378 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9379 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9380
        }
9381
9382 3
        if ($clean_utf8 === true) {
9383
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9384
            // if invalid characters are found in $haystack before $needle
9385 2
            $needle = self::clean($needle);
9386 2
            $haystack = self::clean($haystack);
9387
        }
9388
9389
        //
9390
        // fallback via mbstring
9391
        //
9392
9393 3
        if (self::$SUPPORT['mbstring'] === true) {
9394 3
            if ($encoding === 'UTF-8') {
9395 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9396
            }
9397
9398 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9399
        }
9400
9401
        //
9402
        // fallback via vanilla php
9403
        //
9404
9405
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9406
        if ($needle_tmp === false) {
9407
            return false;
9408
        }
9409
        $needle = (string) $needle_tmp;
9410
9411
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9412
        if ($pos === false) {
9413
            return false;
9414
        }
9415
9416
        if ($before_needle) {
9417
            return self::substr($haystack, 0, $pos, $encoding);
9418
        }
9419
9420
        return self::substr($haystack, $pos, null, $encoding);
9421
    }
9422
9423
    /**
9424
     * Find position of last occurrence of a case-insensitive string.
9425
     *
9426
     * @param string     $haystack   <p>The string to look in.</p>
9427
     * @param int|string $needle     <p>The string to look for.</p>
9428
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9429
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9430
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9431
     *
9432
     * @return false|int
9433
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9434
     *                   string.<br>If needle is not found, it returns false.</p>
9435
     */
9436 3
    public static function strripos(
9437
        string $haystack,
9438
        $needle,
9439
        int $offset = 0,
9440
        string $encoding = 'UTF-8',
9441
        bool $clean_utf8 = false
9442
    ) {
9443 3
        if ($haystack === '') {
9444
            return false;
9445
        }
9446
9447
        // iconv and mbstring do not support integer $needle
9448 3
        if ((int) $needle === $needle && $needle >= 0) {
9449
            $needle = (string) self::chr($needle);
9450
        }
9451 3
        $needle = (string) $needle;
9452
9453 3
        if ($needle === '') {
9454
            return false;
9455
        }
9456
9457 3
        if ($clean_utf8 === true) {
9458
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9459 2
            $needle = self::clean($needle);
9460 2
            $haystack = self::clean($haystack);
9461
        }
9462
9463 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9464 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9465
        }
9466
9467
        //
9468
        // fallback via mbstrig
9469
        //
9470
9471 3
        if (self::$SUPPORT['mbstring'] === true) {
9472 3
            if ($encoding === 'UTF-8') {
9473 3
                return \mb_strripos($haystack, $needle, $offset);
9474
            }
9475
9476
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9477
        }
9478
9479
        //
9480
        // fallback for binary || ascii only
9481
        //
9482
9483
        if (
9484
            $encoding === 'CP850'
9485
            ||
9486
            $encoding === 'ASCII'
9487
        ) {
9488
            return \strripos($haystack, $needle, $offset);
9489
        }
9490
9491
        if (
9492
            $encoding !== 'UTF-8'
9493
            &&
9494
            self::$SUPPORT['mbstring'] === false
9495
        ) {
9496
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9497
        }
9498
9499
        //
9500
        // fallback via intl
9501
        //
9502
9503
        if (
9504
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9505
            &&
9506
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9507
            &&
9508
            self::$SUPPORT['intl'] === true
9509
        ) {
9510
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9511
            if ($return_tmp !== false) {
9512
                return $return_tmp;
9513
            }
9514
        }
9515
9516
        //
9517
        // fallback for ascii only
9518
        //
9519
9520
        if (ASCII::is_ascii($haystack . $needle)) {
9521
            return \strripos($haystack, $needle, $offset);
9522
        }
9523
9524
        //
9525
        // fallback via vanilla php
9526
        //
9527
9528
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9529
        $needle = self::strtocasefold($needle, true, false, $encoding);
9530
9531
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9532
    }
9533
9534
    /**
9535
     * Finds position of last occurrence of a string within another, case insensitive.
9536
     *
9537
     * @param string $haystack <p>
9538
     *                         The string from which to get the position of the last occurrence
9539
     *                         of needle.
9540
     *                         </p>
9541
     * @param string $needle   <p>
9542
     *                         The string to find in haystack.
9543
     *                         </p>
9544
     * @param int    $offset   [optional] <p>
9545
     *                         The position in haystack
9546
     *                         to start searching.
9547
     *                         </p>
9548
     *
9549
     * @return false|int
9550
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9551
     *                   haystack string, or false if needle is not found.</p>
9552
     */
9553
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9554
    {
9555
        if ($haystack === '' || $needle === '') {
9556
            return false;
9557
        }
9558
9559
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9560
            // "mb_" is available if overload is used, so use it ...
9561
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9562
        }
9563
9564
        return \strripos($haystack, $needle, $offset);
9565
    }
9566
9567
    /**
9568
     * Find position of last occurrence of a string in a string.
9569
     *
9570
     * @see http://php.net/manual/en/function.mb-strrpos.php
9571
     *
9572
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9573
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9574
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9575
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9576
     *                               the end of the string.
9577
     *                               </p>
9578
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9579
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9580
     *
9581
     * @return false|int
9582
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9583
     *                   string.<br>If needle is not found, it returns false.</p>
9584
     */
9585 35
    public static function strrpos(
9586
        string $haystack,
9587
        $needle,
9588
        int $offset = 0,
9589
        string $encoding = 'UTF-8',
9590
        bool $clean_utf8 = false
9591
    ) {
9592 35
        if ($haystack === '') {
9593 3
            return false;
9594
        }
9595
9596
        // iconv and mbstring do not support integer $needle
9597 34
        if ((int) $needle === $needle && $needle >= 0) {
9598 2
            $needle = (string) self::chr($needle);
9599
        }
9600 34
        $needle = (string) $needle;
9601
9602 34
        if ($needle === '') {
9603 2
            return false;
9604
        }
9605
9606 34
        if ($clean_utf8 === true) {
9607
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9608 4
            $needle = self::clean($needle);
9609 4
            $haystack = self::clean($haystack);
9610
        }
9611
9612 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9613 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9614
        }
9615
9616
        //
9617
        // fallback via mbstring
9618
        //
9619
9620 34
        if (self::$SUPPORT['mbstring'] === true) {
9621 34
            if ($encoding === 'UTF-8') {
9622 34
                return \mb_strrpos($haystack, $needle, $offset);
9623
            }
9624
9625 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9626
        }
9627
9628
        //
9629
        // fallback for binary || ascii only
9630
        //
9631
9632
        if (
9633
            $encoding === 'CP850'
9634
            ||
9635
            $encoding === 'ASCII'
9636
        ) {
9637
            return \strrpos($haystack, $needle, $offset);
9638
        }
9639
9640
        if (
9641
            $encoding !== 'UTF-8'
9642
            &&
9643
            self::$SUPPORT['mbstring'] === false
9644
        ) {
9645
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9646
        }
9647
9648
        //
9649
        // fallback via intl
9650
        //
9651
9652
        if (
9653
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9654
            &&
9655
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9656
            &&
9657
            self::$SUPPORT['intl'] === true
9658
        ) {
9659
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9660
            if ($return_tmp !== false) {
9661
                return $return_tmp;
9662
            }
9663
        }
9664
9665
        //
9666
        // fallback for ascii only
9667
        //
9668
9669
        if (ASCII::is_ascii($haystack . $needle)) {
9670
            return \strrpos($haystack, $needle, $offset);
9671
        }
9672
9673
        //
9674
        // fallback via vanilla php
9675
        //
9676
9677
        $haystack_tmp = null;
9678
        if ($offset > 0) {
9679
            $haystack_tmp = self::substr($haystack, $offset);
9680
        } elseif ($offset < 0) {
9681
            $haystack_tmp = self::substr($haystack, 0, $offset);
9682
            $offset = 0;
9683
        }
9684
9685
        if ($haystack_tmp !== null) {
9686
            if ($haystack_tmp === false) {
9687
                $haystack_tmp = '';
9688
            }
9689
            $haystack = (string) $haystack_tmp;
9690
        }
9691
9692
        $pos = \strrpos($haystack, $needle);
9693
        if ($pos === false) {
9694
            return false;
9695
        }
9696
9697
        $str_tmp = \substr($haystack, 0, $pos);
9698
        if ($str_tmp === false) {
9699
            return false;
9700
        }
9701
9702
        return $offset + (int) self::strlen($str_tmp);
9703
    }
9704
9705
    /**
9706
     * Find position of last occurrence of a string in a string.
9707
     *
9708
     * @param string $haystack <p>
9709
     *                         The string being checked, for the last occurrence
9710
     *                         of needle.
9711
     *                         </p>
9712
     * @param string $needle   <p>
9713
     *                         The string to find in haystack.
9714
     *                         </p>
9715
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9716
     *                         the string. Negative values will stop searching at an arbitrary point
9717
     *                         prior to the end of the string.
9718
     *                         </p>
9719
     *
9720
     * @return false|int
9721
     *                   <p>The numeric position of the last occurrence of needle in the
9722
     *                   haystack string. If needle is not found, it returns false.</p>
9723
     */
9724
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9725
    {
9726
        if ($haystack === '' || $needle === '') {
9727
            return false;
9728
        }
9729
9730
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9731
            // "mb_" is available if overload is used, so use it ...
9732
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9733
        }
9734
9735
        return \strrpos($haystack, $needle, $offset);
9736
    }
9737
9738
    /**
9739
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9740
     * mask.
9741
     *
9742
     * @param string $str      <p>The input string.</p>
9743
     * @param string $mask     <p>The mask of chars</p>
9744
     * @param int    $offset   [optional]
9745
     * @param int    $length   [optional]
9746
     * @param string $encoding [optional] <p>Set the charset.</p>
9747
     *
9748
     * @return false|int
9749
     */
9750 10
    public static function strspn(
9751
        string $str,
9752
        string $mask,
9753
        int $offset = 0,
9754
        int $length = null,
9755
        string $encoding = 'UTF-8'
9756
    ) {
9757 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9758
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9759
        }
9760
9761 10
        if ($offset || $length !== null) {
9762 2
            if ($encoding === 'UTF-8') {
9763 2
                if ($length === null) {
9764
                    $str = (string) \mb_substr($str, $offset);
9765
                } else {
9766 2
                    $str = (string) \mb_substr($str, $offset, $length);
9767
                }
9768
            } else {
9769
                $str = (string) self::substr($str, $offset, $length, $encoding);
9770
            }
9771
        }
9772
9773 10
        if ($str === '' || $mask === '') {
9774 2
            return 0;
9775
        }
9776
9777 8
        $matches = [];
9778
9779 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9780
    }
9781
9782
    /**
9783
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9784
     *
9785
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9786
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9787
     * @param bool   $before_needle [optional] <p>
9788
     *                              If <b>TRUE</b>, strstr() returns the part of the
9789
     *                              haystack before the first occurrence of the needle (excluding the needle).
9790
     *                              </p>
9791
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9792
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9793
     *
9794
     * @return false|string
9795
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9796
     */
9797 3
    public static function strstr(
9798
        string $haystack,
9799
        string $needle,
9800
        bool $before_needle = false,
9801
        string $encoding = 'UTF-8',
9802
        $clean_utf8 = false
9803
    ) {
9804 3
        if ($haystack === '' || $needle === '') {
9805 2
            return false;
9806
        }
9807
9808 3
        if ($clean_utf8 === true) {
9809
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9810
            // if invalid characters are found in $haystack before $needle
9811
            $needle = self::clean($needle);
9812
            $haystack = self::clean($haystack);
9813
        }
9814
9815 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9816 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9817
        }
9818
9819
        //
9820
        // fallback via mbstring
9821
        //
9822
9823 3
        if (self::$SUPPORT['mbstring'] === true) {
9824 3
            if ($encoding === 'UTF-8') {
9825 3
                return \mb_strstr($haystack, $needle, $before_needle);
9826
            }
9827
9828 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9829
        }
9830
9831
        //
9832
        // fallback for binary || ascii only
9833
        //
9834
9835
        if (
9836
            $encoding === 'CP850'
9837
            ||
9838
            $encoding === 'ASCII'
9839
        ) {
9840
            return \strstr($haystack, $needle, $before_needle);
9841
        }
9842
9843
        if (
9844
            $encoding !== 'UTF-8'
9845
            &&
9846
            self::$SUPPORT['mbstring'] === false
9847
        ) {
9848
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9849
        }
9850
9851
        //
9852
        // fallback via intl
9853
        //
9854
9855
        if (
9856
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9857
            &&
9858
            self::$SUPPORT['intl'] === true
9859
        ) {
9860
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
9861
            if ($return_tmp !== false) {
9862
                return $return_tmp;
9863
            }
9864
        }
9865
9866
        //
9867
        // fallback for ascii only
9868
        //
9869
9870
        if (ASCII::is_ascii($haystack . $needle)) {
9871
            return \strstr($haystack, $needle, $before_needle);
9872
        }
9873
9874
        //
9875
        // fallback via vanilla php
9876
        //
9877
9878
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9879
9880
        if (!isset($match[1])) {
9881
            return false;
9882
        }
9883
9884
        if ($before_needle) {
9885
            return $match[1];
9886
        }
9887
9888
        return self::substr($haystack, (int) self::strlen($match[1]));
9889
    }
9890
9891
    /**
9892
     *  * Finds first occurrence of a string within another.
9893
     *
9894
     * @param string $haystack      <p>
9895
     *                              The string from which to get the first occurrence
9896
     *                              of needle.
9897
     *                              </p>
9898
     * @param string $needle        <p>
9899
     *                              The string to find in haystack.
9900
     *                              </p>
9901
     * @param bool   $before_needle [optional] <p>
9902
     *                              Determines which portion of haystack
9903
     *                              this function returns.
9904
     *                              If set to true, it returns all of haystack
9905
     *                              from the beginning to the first occurrence of needle.
9906
     *                              If set to false, it returns all of haystack
9907
     *                              from the first occurrence of needle to the end,
9908
     *                              </p>
9909
     *
9910
     * @return false|string
9911
     *                      <p>The portion of haystack,
9912
     *                      or false if needle is not found.</p>
9913
     */
9914
    public static function strstr_in_byte(
9915
        string $haystack,
9916
        string $needle,
9917
        bool $before_needle = false
9918
    ) {
9919
        if ($haystack === '' || $needle === '') {
9920
            return false;
9921
        }
9922
9923
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9924
            // "mb_" is available if overload is used, so use it ...
9925
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9926
        }
9927
9928
        return \strstr($haystack, $needle, $before_needle);
9929
    }
9930
9931
    /**
9932
     * Unicode transformation for case-less matching.
9933
     *
9934
     * @see http://unicode.org/reports/tr21/tr21-5.html
9935
     *
9936
     * @param string      $str        <p>The input string.</p>
9937
     * @param bool        $full       [optional] <p>
9938
     *                                <b>true</b>, replace full case folding chars (default)<br>
9939
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9940
     *                                </p>
9941
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9942
     * @param string      $encoding   [optional] <p>Set the charset.</p>
9943
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9944
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9945
     *                                is for some languages better ...</p>
9946
     *
9947
     * @return string
9948
     */
9949 32
    public static function strtocasefold(
9950
        string $str,
9951
        bool $full = true,
9952
        bool $clean_utf8 = false,
9953
        string $encoding = 'UTF-8',
9954
        string $lang = null,
9955
        $lower = true
9956
    ): string {
9957 32
        if ($str === '') {
9958 5
            return '';
9959
        }
9960
9961 31
        if ($clean_utf8 === true) {
9962
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9963
            // if invalid characters are found in $haystack before $needle
9964 2
            $str = self::clean($str);
9965
        }
9966
9967 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9968
9969 31
        if ($lang === null && $encoding === 'UTF-8') {
9970 31
            if ($lower === true) {
9971 2
                return \mb_strtolower($str);
9972
            }
9973
9974 29
            return \mb_strtoupper($str);
9975
        }
9976
9977 2
        if ($lower === true) {
9978
            return self::strtolower($str, $encoding, false, $lang);
9979
        }
9980
9981 2
        return self::strtoupper($str, $encoding, false, $lang);
9982
    }
9983
9984
    /**
9985
     * Make a string lowercase.
9986
     *
9987
     * @see http://php.net/manual/en/function.mb-strtolower.php
9988
     *
9989
     * @param string      $str                           <p>The string being lowercased.</p>
9990
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
9991
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9992
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9993
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9994
     *
9995
     * @return string
9996
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9997
     */
9998 73
    public static function strtolower(
9999
        $str,
10000
        string $encoding = 'UTF-8',
10001
        bool $clean_utf8 = false,
10002
        string $lang = null,
10003
        bool $try_to_keep_the_string_length = false
10004
    ): string {
10005
        // init
10006 73
        $str = (string) $str;
10007
10008 73
        if ($str === '') {
10009 1
            return '';
10010
        }
10011
10012 72
        if ($clean_utf8 === true) {
10013
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10014
            // if invalid characters are found in $haystack before $needle
10015 2
            $str = self::clean($str);
10016
        }
10017
10018
        // hack for old php version or for the polyfill ...
10019 72
        if ($try_to_keep_the_string_length === true) {
10020
            $str = self::fixStrCaseHelper($str, true);
10021
        }
10022
10023 72
        if ($lang === null && $encoding === 'UTF-8') {
10024 13
            return \mb_strtolower($str);
10025
        }
10026
10027 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10028
10029 61
        if ($lang !== null) {
10030 2
            if (self::$SUPPORT['intl'] === true) {
10031 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10032
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10033
                }
10034
10035 2
                $language_code = $lang . '-Lower';
10036 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10037
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10038
10039
                    $language_code = 'Any-Lower';
10040
                }
10041
10042
                /** @noinspection PhpComposerExtensionStubsInspection */
10043
                /** @noinspection UnnecessaryCastingInspection */
10044 2
                return (string) \transliterator_transliterate($language_code, $str);
10045
            }
10046
10047
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10048
        }
10049
10050
        // always fallback via symfony polyfill
10051 61
        return \mb_strtolower($str, $encoding);
10052
    }
10053
10054
    /**
10055
     * Make a string uppercase.
10056
     *
10057
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10058
     *
10059
     * @param string      $str                           <p>The string being uppercased.</p>
10060
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10061
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10062
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10063
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10064
     *
10065
     * @return string
10066
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10067
     */
10068 17
    public static function strtoupper(
10069
        $str,
10070
        string $encoding = 'UTF-8',
10071
        bool $clean_utf8 = false,
10072
        string $lang = null,
10073
        bool $try_to_keep_the_string_length = false
10074
    ): string {
10075
        // init
10076 17
        $str = (string) $str;
10077
10078 17
        if ($str === '') {
10079 1
            return '';
10080
        }
10081
10082 16
        if ($clean_utf8 === true) {
10083
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10084
            // if invalid characters are found in $haystack before $needle
10085 2
            $str = self::clean($str);
10086
        }
10087
10088
        // hack for old php version or for the polyfill ...
10089 16
        if ($try_to_keep_the_string_length === true) {
10090 2
            $str = self::fixStrCaseHelper($str, false);
10091
        }
10092
10093 16
        if ($lang === null && $encoding === 'UTF-8') {
10094 8
            return \mb_strtoupper($str);
10095
        }
10096
10097 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10098
10099 10
        if ($lang !== null) {
10100 2
            if (self::$SUPPORT['intl'] === true) {
10101 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10102
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10103
                }
10104
10105 2
                $language_code = $lang . '-Upper';
10106 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10107
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10108
10109
                    $language_code = 'Any-Upper';
10110
                }
10111
10112
                /** @noinspection PhpComposerExtensionStubsInspection */
10113
                /** @noinspection UnnecessaryCastingInspection */
10114 2
                return (string) \transliterator_transliterate($language_code, $str);
10115
            }
10116
10117
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10118
        }
10119
10120
        // always fallback via symfony polyfill
10121 10
        return \mb_strtoupper($str, $encoding);
10122
    }
10123
10124
    /**
10125
     * Translate characters or replace sub-strings.
10126
     *
10127
     * @see http://php.net/manual/en/function.strtr.php
10128
     *
10129
     * @param string          $str  <p>The string being translated.</p>
10130
     * @param string|string[] $from <p>The string replacing from.</p>
10131
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10132
     *
10133
     * @return string
10134
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10135
     *                corresponding character in "to".</p>
10136
     */
10137 2
    public static function strtr(string $str, $from, $to = ''): string
10138
    {
10139 2
        if ($str === '') {
10140
            return '';
10141
        }
10142
10143 2
        if ($from === $to) {
10144
            return $str;
10145
        }
10146
10147 2
        if ($to !== '') {
10148 2
            $from = self::str_split($from);
10149 2
            $to = self::str_split($to);
10150 2
            $count_from = \count($from);
10151 2
            $count_to = \count($to);
10152
10153 2
            if ($count_from > $count_to) {
10154 2
                $from = \array_slice($from, 0, $count_to);
10155 2
            } elseif ($count_from < $count_to) {
10156 2
                $to = \array_slice($to, 0, $count_from);
10157
            }
10158
10159 2
            $from = \array_combine($from, $to);
10160
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10161 2
            if ($from === false) {
10162
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10163
            }
10164
        }
10165
10166 2
        if (\is_string($from)) {
10167 2
            return \str_replace($from, '', $str);
10168
        }
10169
10170 2
        return \strtr($str, $from);
10171
    }
10172
10173
    /**
10174
     * Return the width of a string.
10175
     *
10176
     * @param string $str        <p>The input string.</p>
10177
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10178
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10179
     *
10180
     * @return int
10181
     */
10182 2
    public static function strwidth(
10183
        string $str,
10184
        string $encoding = 'UTF-8',
10185
        bool $clean_utf8 = false
10186
    ): int {
10187 2
        if ($str === '') {
10188 2
            return 0;
10189
        }
10190
10191 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10192 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10193
        }
10194
10195 2
        if ($clean_utf8 === true) {
10196
            // iconv and mbstring are not tolerant to invalid encoding
10197
            // further, their behaviour is inconsistent with that of PHP's substr
10198 2
            $str = self::clean($str);
10199
        }
10200
10201
        //
10202
        // fallback via mbstring
10203
        //
10204
10205 2
        if (self::$SUPPORT['mbstring'] === true) {
10206 2
            if ($encoding === 'UTF-8') {
10207 2
                return \mb_strwidth($str);
10208
            }
10209
10210
            return \mb_strwidth($str, $encoding);
10211
        }
10212
10213
        //
10214
        // fallback via vanilla php
10215
        //
10216
10217
        if ($encoding !== 'UTF-8') {
10218
            $str = self::encode('UTF-8', $str, false, $encoding);
10219
        }
10220
10221
        $wide = 0;
10222
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10223
10224
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10225
    }
10226
10227
    /**
10228
     * Get part of a string.
10229
     *
10230
     * @see http://php.net/manual/en/function.mb-substr.php
10231
     *
10232
     * @param string $str        <p>The string being checked.</p>
10233
     * @param int    $offset     <p>The first position used in str.</p>
10234
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10235
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10236
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10237
     *
10238
     * @return false|string
10239
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10240
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10241
     *                      characters long, <b>FALSE</b> will be returned.
10242
     */
10243 172
    public static function substr(
10244
        string $str,
10245
        int $offset = 0,
10246
        int $length = null,
10247
        string $encoding = 'UTF-8',
10248
        bool $clean_utf8 = false
10249
    ) {
10250
        // empty string
10251 172
        if ($str === '' || $length === 0) {
10252 8
            return '';
10253
        }
10254
10255 168
        if ($clean_utf8 === true) {
10256
            // iconv and mbstring are not tolerant to invalid encoding
10257
            // further, their behaviour is inconsistent with that of PHP's substr
10258 2
            $str = self::clean($str);
10259
        }
10260
10261
        // whole string
10262 168
        if (!$offset && $length === null) {
10263 7
            return $str;
10264
        }
10265
10266 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10267 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10268
        }
10269
10270
        //
10271
        // fallback via mbstring
10272
        //
10273
10274 163
        if (self::$SUPPORT['mbstring'] === true) {
10275 161
            if ($encoding === 'UTF-8') {
10276 161
                if ($length === null) {
10277 64
                    return \mb_substr($str, $offset);
10278
                }
10279
10280 102
                return \mb_substr($str, $offset, $length);
10281
            }
10282
10283
            return self::substr($str, $offset, $length, $encoding);
10284
        }
10285
10286
        //
10287
        // fallback for binary || ascii only
10288
        //
10289
10290
        if (
10291 4
            $encoding === 'CP850'
10292
            ||
10293 4
            $encoding === 'ASCII'
10294
        ) {
10295
            if ($length === null) {
10296
                return \substr($str, $offset);
10297
            }
10298
10299
            return \substr($str, $offset, $length);
10300
        }
10301
10302
        // otherwise we need the string-length
10303 4
        $str_length = 0;
10304 4
        if ($offset || $length === null) {
10305 4
            $str_length = self::strlen($str, $encoding);
10306
        }
10307
10308
        // e.g.: invalid chars + mbstring not installed
10309 4
        if ($str_length === false) {
10310
            return false;
10311
        }
10312
10313
        // empty string
10314 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10315
            return '';
10316
        }
10317
10318
        // impossible
10319 4
        if ($offset && $offset > $str_length) {
10320
            return '';
10321
        }
10322
10323 4
        if ($length === null) {
10324 4
            $length = (int) $str_length;
10325
        } else {
10326 2
            $length = (int) $length;
10327
        }
10328
10329
        if (
10330 4
            $encoding !== 'UTF-8'
10331
            &&
10332 4
            self::$SUPPORT['mbstring'] === false
10333
        ) {
10334 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10335
        }
10336
10337
        //
10338
        // fallback via intl
10339
        //
10340
10341
        if (
10342 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10343
            &&
10344 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10345
            &&
10346 4
            self::$SUPPORT['intl'] === true
10347
        ) {
10348
            $return_tmp = \grapheme_substr($str, $offset, $length);
10349
            if ($return_tmp !== false) {
10350
                return $return_tmp;
10351
            }
10352
        }
10353
10354
        //
10355
        // fallback via iconv
10356
        //
10357
10358
        if (
10359 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10360
            &&
10361 4
            self::$SUPPORT['iconv'] === true
10362
        ) {
10363
            $return_tmp = \iconv_substr($str, $offset, $length);
10364
            if ($return_tmp !== false) {
10365
                return $return_tmp;
10366
            }
10367
        }
10368
10369
        //
10370
        // fallback for ascii only
10371
        //
10372
10373 4
        if (ASCII::is_ascii($str)) {
10374
            return \substr($str, $offset, $length);
10375
        }
10376
10377
        //
10378
        // fallback via vanilla php
10379
        //
10380
10381
        // split to array, and remove invalid characters
10382 4
        $array = self::str_split($str);
10383
10384
        // extract relevant part, and join to make sting again
10385 4
        return \implode('', \array_slice($array, $offset, $length));
10386
    }
10387
10388
    /**
10389
     * Binary safe comparison of two strings from an offset, up to length characters.
10390
     *
10391
     * @param string   $str1               <p>The main string being compared.</p>
10392
     * @param string   $str2               <p>The secondary string being compared.</p>
10393
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10394
     *                                     counting from the end of the string.</p>
10395
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10396
     *                                     of the length of the str compared to the length of main_str less the
10397
     *                                     offset.</p>
10398
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10399
     *                                     insensitive.</p>
10400
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10401
     *
10402
     * @return int
10403
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10404
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10405
     *             <strong>0</strong> if they are equal
10406
     */
10407 2
    public static function substr_compare(
10408
        string $str1,
10409
        string $str2,
10410
        int $offset = 0,
10411
        int $length = null,
10412
        bool $case_insensitivity = false,
10413
        string $encoding = 'UTF-8'
10414
    ): int {
10415
        if (
10416 2
            $offset !== 0
10417
            ||
10418 2
            $length !== null
10419
        ) {
10420 2
            if ($encoding === 'UTF-8') {
10421 2
                if ($length === null) {
10422 2
                    $str1 = (string) \mb_substr($str1, $offset);
10423
                } else {
10424 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10425
                }
10426 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10427
            } else {
10428
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10429
10430
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10431
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10432
            }
10433
        }
10434
10435 2
        if ($case_insensitivity === true) {
10436 2
            return self::strcasecmp($str1, $str2, $encoding);
10437
        }
10438
10439 2
        return self::strcmp($str1, $str2);
10440
    }
10441
10442
    /**
10443
     * Count the number of substring occurrences.
10444
     *
10445
     * @see http://php.net/manual/en/function.substr-count.php
10446
     *
10447
     * @param string $haystack   <p>The string to search in.</p>
10448
     * @param string $needle     <p>The substring to search for.</p>
10449
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10450
     * @param int    $length     [optional] <p>
10451
     *                           The maximum length after the specified offset to search for the
10452
     *                           substring. It outputs a warning if the offset plus the length is
10453
     *                           greater than the haystack length.
10454
     *                           </p>
10455
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10456
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10457
     *
10458
     * @return false|int this functions returns an integer or false if there isn't a string
10459
     */
10460 5
    public static function substr_count(
10461
        string $haystack,
10462
        string $needle,
10463
        int $offset = 0,
10464
        int $length = null,
10465
        string $encoding = 'UTF-8',
10466
        bool $clean_utf8 = false
10467
    ) {
10468 5
        if ($haystack === '' || $needle === '') {
10469 2
            return false;
10470
        }
10471
10472 5
        if ($length === 0) {
10473 2
            return 0;
10474
        }
10475
10476 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10477 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10478
        }
10479
10480 5
        if ($clean_utf8 === true) {
10481
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10482
            // if invalid characters are found in $haystack before $needle
10483
            $needle = self::clean($needle);
10484
            $haystack = self::clean($haystack);
10485
        }
10486
10487 5
        if ($offset || $length > 0) {
10488 2
            if ($length === null) {
10489 2
                $length_tmp = self::strlen($haystack, $encoding);
10490 2
                if ($length_tmp === false) {
10491
                    return false;
10492
                }
10493 2
                $length = (int) $length_tmp;
10494
            }
10495
10496 2
            if ($encoding === 'UTF-8') {
10497 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10498
            } else {
10499 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10500
            }
10501
        }
10502
10503
        if (
10504 5
            $encoding !== 'UTF-8'
10505
            &&
10506 5
            self::$SUPPORT['mbstring'] === false
10507
        ) {
10508
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10509
        }
10510
10511 5
        if (self::$SUPPORT['mbstring'] === true) {
10512 5
            if ($encoding === 'UTF-8') {
10513 5
                return \mb_substr_count($haystack, $needle);
10514
            }
10515
10516 2
            return \mb_substr_count($haystack, $needle, $encoding);
10517
        }
10518
10519
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10520
10521
        return \count($matches);
10522
    }
10523
10524
    /**
10525
     * Count the number of substring occurrences.
10526
     *
10527
     * @param string $haystack <p>
10528
     *                         The string being checked.
10529
     *                         </p>
10530
     * @param string $needle   <p>
10531
     *                         The string being found.
10532
     *                         </p>
10533
     * @param int    $offset   [optional] <p>
10534
     *                         The offset where to start counting
10535
     *                         </p>
10536
     * @param int    $length   [optional] <p>
10537
     *                         The maximum length after the specified offset to search for the
10538
     *                         substring. It outputs a warning if the offset plus the length is
10539
     *                         greater than the haystack length.
10540
     *                         </p>
10541
     *
10542
     * @return false|int the number of times the
10543
     *                   needle substring occurs in the
10544
     *                   haystack string
10545
     */
10546
    public static function substr_count_in_byte(
10547
        string $haystack,
10548
        string $needle,
10549
        int $offset = 0,
10550
        int $length = null
10551
    ) {
10552
        if ($haystack === '' || $needle === '') {
10553
            return 0;
10554
        }
10555
10556
        if (
10557
            ($offset || $length !== null)
10558
            &&
10559
            self::$SUPPORT['mbstring_func_overload'] === true
10560
        ) {
10561
            if ($length === null) {
10562
                $length_tmp = self::strlen($haystack);
10563
                if ($length_tmp === false) {
10564
                    return false;
10565
                }
10566
                $length = (int) $length_tmp;
10567
            }
10568
10569
            if (
10570
                (
10571
                    $length !== 0
10572
                    &&
10573
                    $offset !== 0
10574
                )
10575
                &&
10576
                ($length + $offset) <= 0
10577
                &&
10578
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10579
            ) {
10580
                return false;
10581
            }
10582
10583
            $haystack_tmp = \substr($haystack, $offset, $length);
10584
            if ($haystack_tmp === false) {
10585
                $haystack_tmp = '';
10586
            }
10587
            $haystack = (string) $haystack_tmp;
10588
        }
10589
10590
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10591
            // "mb_" is available if overload is used, so use it ...
10592
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10593
        }
10594
10595
        if ($length === null) {
10596
            return \substr_count($haystack, $needle, $offset);
10597
        }
10598
10599
        return \substr_count($haystack, $needle, $offset, $length);
10600
    }
10601
10602
    /**
10603
     * Returns the number of occurrences of $substring in the given string.
10604
     * By default, the comparison is case-sensitive, but can be made insensitive
10605
     * by setting $case_sensitive to false.
10606
     *
10607
     * @param string $str            <p>The input string.</p>
10608
     * @param string $substring      <p>The substring to search for.</p>
10609
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10610
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10611
     *
10612
     * @return int
10613
     */
10614 15
    public static function substr_count_simple(
10615
        string $str,
10616
        string $substring,
10617
        bool $case_sensitive = true,
10618
        string $encoding = 'UTF-8'
10619
    ): int {
10620 15
        if ($str === '' || $substring === '') {
10621 2
            return 0;
10622
        }
10623
10624 13
        if ($encoding === 'UTF-8') {
10625 7
            if ($case_sensitive) {
10626
                return (int) \mb_substr_count($str, $substring);
10627
            }
10628
10629 7
            return (int) \mb_substr_count(
10630 7
                \mb_strtoupper($str),
10631 7
                \mb_strtoupper($substring)
10632
            );
10633
        }
10634
10635 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10636
10637 6
        if ($case_sensitive) {
10638 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10639
        }
10640
10641 3
        return (int) \mb_substr_count(
10642 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10643 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10644 3
            $encoding
10645
        );
10646
    }
10647
10648
    /**
10649
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10650
     *
10651
     * @param string $haystack <p>The string to search in.</p>
10652
     * @param string $needle   <p>The substring to search for.</p>
10653
     *
10654
     * @return string return the sub-string
10655
     */
10656 2
    public static function substr_ileft(string $haystack, string $needle): string
10657
    {
10658 2
        if ($haystack === '') {
10659 2
            return '';
10660
        }
10661
10662 2
        if ($needle === '') {
10663 2
            return $haystack;
10664
        }
10665
10666 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10667 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10668
        }
10669
10670 2
        return $haystack;
10671
    }
10672
10673
    /**
10674
     * Get part of a string process in bytes.
10675
     *
10676
     * @param string $str    <p>The string being checked.</p>
10677
     * @param int    $offset <p>The first position used in str.</p>
10678
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10679
     *
10680
     * @return false|string
10681
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10682
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10683
     *                      characters long, <b>FALSE</b> will be returned.
10684
     */
10685
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10686
    {
10687
        // empty string
10688
        if ($str === '' || $length === 0) {
10689
            return '';
10690
        }
10691
10692
        // whole string
10693
        if (!$offset && $length === null) {
10694
            return $str;
10695
        }
10696
10697
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10698
            // "mb_" is available if overload is used, so use it ...
10699
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10700
        }
10701
10702
        return \substr($str, $offset, $length ?? 2147483647);
10703
    }
10704
10705
    /**
10706
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10707
     *
10708
     * @param string $haystack <p>The string to search in.</p>
10709
     * @param string $needle   <p>The substring to search for.</p>
10710
     *
10711
     * @return string return the sub-string
10712
     */
10713 2
    public static function substr_iright(string $haystack, string $needle): string
10714
    {
10715 2
        if ($haystack === '') {
10716 2
            return '';
10717
        }
10718
10719 2
        if ($needle === '') {
10720 2
            return $haystack;
10721
        }
10722
10723 2
        if (self::str_iends_with($haystack, $needle) === true) {
10724 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10725
        }
10726
10727 2
        return $haystack;
10728
    }
10729
10730
    /**
10731
     * Removes an prefix ($needle) from start of the string ($haystack).
10732
     *
10733
     * @param string $haystack <p>The string to search in.</p>
10734
     * @param string $needle   <p>The substring to search for.</p>
10735
     *
10736
     * @return string return the sub-string
10737
     */
10738 2
    public static function substr_left(string $haystack, string $needle): string
10739
    {
10740 2
        if ($haystack === '') {
10741 2
            return '';
10742
        }
10743
10744 2
        if ($needle === '') {
10745 2
            return $haystack;
10746
        }
10747
10748 2
        if (self::str_starts_with($haystack, $needle) === true) {
10749 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10750
        }
10751
10752 2
        return $haystack;
10753
    }
10754
10755
    /**
10756
     * Replace text within a portion of a string.
10757
     *
10758
     * source: https://gist.github.com/stemar/8287074
10759
     *
10760
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10761
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10762
     * @param int|int[]       $offset      <p>
10763
     *                                     If start is positive, the replacing will begin at the start'th offset
10764
     *                                     into string.
10765
     *                                     <br><br>
10766
     *                                     If start is negative, the replacing will begin at the start'th character
10767
     *                                     from the end of string.
10768
     *                                     </p>
10769
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10770
     *                                     portion of string which is to be replaced. If it is negative, it
10771
     *                                     represents the number of characters from the end of string at which to
10772
     *                                     stop replacing. If it is not given, then it will default to strlen(
10773
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10774
     *                                     length is zero then this function will have the effect of inserting
10775
     *                                     replacement into string at the given start offset.</p>
10776
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10777
     *
10778
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10779
     */
10780 10
    public static function substr_replace(
10781
        $str,
10782
        $replacement,
10783
        $offset,
10784
        $length = null,
10785
        string $encoding = 'UTF-8'
10786
    ) {
10787 10
        if (\is_array($str) === true) {
10788 1
            $num = \count($str);
10789
10790
            // the replacement
10791 1
            if (\is_array($replacement) === true) {
10792 1
                $replacement = \array_slice($replacement, 0, $num);
10793
            } else {
10794 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10795
            }
10796
10797
            // the offset
10798 1
            if (\is_array($offset) === true) {
10799 1
                $offset = \array_slice($offset, 0, $num);
10800 1
                foreach ($offset as &$value_tmp) {
10801 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10802
                }
10803 1
                unset($value_tmp);
10804
            } else {
10805 1
                $offset = \array_pad([$offset], $num, $offset);
10806
            }
10807
10808
            // the length
10809 1
            if ($length === null) {
10810 1
                $length = \array_fill(0, $num, 0);
10811 1
            } elseif (\is_array($length) === true) {
10812 1
                $length = \array_slice($length, 0, $num);
10813 1
                foreach ($length as &$value_tmp_V2) {
10814 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
10815
                }
10816 1
                unset($value_tmp_V2);
10817
            } else {
10818 1
                $length = \array_pad([$length], $num, $length);
10819
            }
10820
10821
            // recursive call
10822 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10823
        }
10824
10825 10
        if (\is_array($replacement) === true) {
10826 1
            if (\count($replacement) > 0) {
10827 1
                $replacement = $replacement[0];
10828
            } else {
10829 1
                $replacement = '';
10830
            }
10831
        }
10832
10833
        // init
10834 10
        $str = (string) $str;
10835 10
        $replacement = (string) $replacement;
10836
10837 10
        if (\is_array($length) === true) {
10838
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10839
        }
10840
10841 10
        if (\is_array($offset) === true) {
10842
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10843
        }
10844
10845 10
        if ($str === '') {
10846 1
            return $replacement;
10847
        }
10848
10849 9
        if (self::$SUPPORT['mbstring'] === true) {
10850 9
            $string_length = (int) self::strlen($str, $encoding);
10851
10852 9
            if ($offset < 0) {
10853 1
                $offset = (int) \max(0, $string_length + $offset);
10854 9
            } elseif ($offset > $string_length) {
10855 1
                $offset = $string_length;
10856
            }
10857
10858 9
            if ($length !== null && $length < 0) {
10859 1
                $length = (int) \max(0, $string_length - $offset + $length);
10860 9
            } elseif ($length === null || $length > $string_length) {
10861 4
                $length = $string_length;
10862
            }
10863
10864
            /** @noinspection AdditionOperationOnArraysInspection */
10865 9
            if (($offset + $length) > $string_length) {
10866 4
                $length = $string_length - $offset;
10867
            }
10868
10869
            /** @noinspection AdditionOperationOnArraysInspection */
10870 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10871 9
                   $replacement .
10872 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10873
        }
10874
10875
        //
10876
        // fallback for ascii only
10877
        //
10878
10879
        if (ASCII::is_ascii($str)) {
10880
            return ($length === null) ?
10881
                \substr_replace($str, $replacement, $offset) :
10882
                \substr_replace($str, $replacement, $offset, $length);
10883
        }
10884
10885
        //
10886
        // fallback via vanilla php
10887
        //
10888
10889
        \preg_match_all('/./us', $str, $str_matches);
10890
        \preg_match_all('/./us', $replacement, $replacement_matches);
10891
10892
        if ($length === null) {
10893
            $length_tmp = self::strlen($str, $encoding);
10894
            if ($length_tmp === false) {
10895
                // e.g.: non mbstring support + invalid chars
10896
                return '';
10897
            }
10898
            $length = (int) $length_tmp;
10899
        }
10900
10901
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
10902
10903
        return \implode('', $str_matches[0]);
10904
    }
10905
10906
    /**
10907
     * Removes an suffix ($needle) from end of the string ($haystack).
10908
     *
10909
     * @param string $haystack <p>The string to search in.</p>
10910
     * @param string $needle   <p>The substring to search for.</p>
10911
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10912
     *
10913
     * @return string return the sub-string
10914
     */
10915 2
    public static function substr_right(
10916
        string $haystack,
10917
        string $needle,
10918
        string $encoding = 'UTF-8'
10919
    ): string {
10920 2
        if ($haystack === '') {
10921 2
            return '';
10922
        }
10923
10924 2
        if ($needle === '') {
10925 2
            return $haystack;
10926
        }
10927
10928
        if (
10929 2
            $encoding === 'UTF-8'
10930
            &&
10931 2
            \substr($haystack, -\strlen($needle)) === $needle
10932
        ) {
10933 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10934
        }
10935
10936 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10937
            return (string) self::substr(
10938
                $haystack,
10939
                0,
10940
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10941
                $encoding
10942
            );
10943
        }
10944
10945 2
        return $haystack;
10946
    }
10947
10948
    /**
10949
     * Returns a case swapped version of the string.
10950
     *
10951
     * @param string $str        <p>The input string.</p>
10952
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10953
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10954
     *
10955
     * @return string each character's case swapped
10956
     */
10957 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
10958
    {
10959 6
        if ($str === '') {
10960 1
            return '';
10961
        }
10962
10963 6
        if ($clean_utf8 === true) {
10964
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10965
            // if invalid characters are found in $haystack before $needle
10966 2
            $str = self::clean($str);
10967
        }
10968
10969 6
        if ($encoding === 'UTF-8') {
10970 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10971
        }
10972
10973 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10974
    }
10975
10976
    /**
10977
     * Checks whether symfony-polyfills are used.
10978
     *
10979
     * @return bool
10980
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10981
     */
10982
    public static function symfony_polyfill_used(): bool
10983
    {
10984
        // init
10985
        $return = false;
10986
10987
        $return_tmp = \extension_loaded('mbstring');
10988
        if ($return_tmp === false && \function_exists('mb_strlen')) {
10989
            $return = true;
10990
        }
10991
10992
        $return_tmp = \extension_loaded('iconv');
10993
        if ($return_tmp === false && \function_exists('iconv')) {
10994
            $return = true;
10995
        }
10996
10997
        return $return;
10998
    }
10999
11000
    /**
11001
     * @param string $str
11002
     * @param int    $tab_length
11003
     *
11004
     * @return string
11005
     */
11006 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11007
    {
11008 6
        if ($tab_length === 4) {
11009 3
            $spaces = '    ';
11010 3
        } elseif ($tab_length === 2) {
11011 1
            $spaces = '  ';
11012
        } else {
11013 2
            $spaces = \str_repeat(' ', $tab_length);
11014
        }
11015
11016 6
        return \str_replace("\t", $spaces, $str);
11017
    }
11018
11019
    /**
11020
     * Converts the first character of each word in the string to uppercase
11021
     * and all other chars to lowercase.
11022
     *
11023
     * @param string      $str                           <p>The input string.</p>
11024
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11025
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11026
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11027
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11028
     *
11029
     * @return string
11030
     *                <p>A string with all characters of $str being title-cased.</p>
11031
     */
11032 5
    public static function titlecase(
11033
        string $str,
11034
        string $encoding = 'UTF-8',
11035
        bool $clean_utf8 = false,
11036
        string $lang = null,
11037
        bool $try_to_keep_the_string_length = false
11038
    ): string {
11039 5
        if ($clean_utf8 === true) {
11040
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11041
            // if invalid characters are found in $haystack before $needle
11042
            $str = self::clean($str);
11043
        }
11044
11045 5
        if ($lang === null && $try_to_keep_the_string_length === false) {
11046 5
            if ($encoding === 'UTF-8') {
11047 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11048
            }
11049
11050 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11051
11052 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11053
        }
11054
11055
        return self::str_titleize(
11056
            $str,
11057
            null,
11058
            $encoding,
11059
            false,
11060
            $lang,
11061
            $try_to_keep_the_string_length,
11062
            false
11063
        );
11064
    }
11065
11066
    /**
11067
     * alias for "UTF8::to_ascii()"
11068
     *
11069
     * @param string $str
11070
     * @param string $subst_chr
11071
     * @param bool   $strict
11072
     *
11073
     * @return string
11074
     *
11075
     * @see UTF8::to_ascii()
11076
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11077
     */
11078 7
    public static function toAscii(
11079
        string $str,
11080
        string $subst_chr = '?',
11081
        bool $strict = false
11082
    ): string {
11083 7
        return self::to_ascii($str, $subst_chr, $strict);
11084
    }
11085
11086
    /**
11087
     * alias for "UTF8::to_iso8859()"
11088
     *
11089
     * @param string|string[] $str
11090
     *
11091
     * @return string|string[]
11092
     *
11093
     * @see UTF8::to_iso8859()
11094
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11095
     */
11096 2
    public static function toIso8859($str)
11097
    {
11098 2
        return self::to_iso8859($str);
11099
    }
11100
11101
    /**
11102
     * alias for "UTF8::to_latin1()"
11103
     *
11104
     * @param string|string[] $str
11105
     *
11106
     * @return string|string[]
11107
     *
11108
     * @see UTF8::to_latin1()
11109
     * @deprecated <p>please use "UTF8::to_latin1()"</p>
11110
     */
11111 2
    public static function toLatin1($str)
11112
    {
11113 2
        return self::to_latin1($str);
11114
    }
11115
11116
    /**
11117
     * alias for "UTF8::to_utf8()"
11118
     *
11119
     * @param string|string[] $str
11120
     *
11121
     * @return string|string[]
11122
     *
11123
     * @see UTF8::to_utf8()
11124
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11125
     */
11126 2
    public static function toUTF8($str)
11127
    {
11128 2
        return self::to_utf8($str);
11129
    }
11130
11131
    /**
11132
     * Convert a string into ASCII.
11133
     *
11134
     * @param string $str     <p>The input string.</p>
11135
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11136
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11137
     *                        performance</p>
11138
     *
11139
     * @return string
11140
     */
11141 37
    public static function to_ascii(
11142
        string $str,
11143
        string $unknown = '?',
11144
        bool $strict = false
11145
    ): string {
11146 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11147
    }
11148
11149
    /**
11150
     * @param mixed $str
11151
     *
11152
     * @return bool
11153
     */
11154 19
    public static function to_boolean($str): bool
11155
    {
11156
        // init
11157 19
        $str = (string) $str;
11158
11159 19
        if ($str === '') {
11160 2
            return false;
11161
        }
11162
11163
        // Info: http://php.net/manual/en/filter.filters.validate.php
11164
        $map = [
11165 17
            'true'  => true,
11166
            '1'     => true,
11167
            'on'    => true,
11168
            'yes'   => true,
11169
            'false' => false,
11170
            '0'     => false,
11171
            'off'   => false,
11172
            'no'    => false,
11173
        ];
11174
11175 17
        if (isset($map[$str])) {
11176 11
            return $map[$str];
11177
        }
11178
11179 6
        $key = \strtolower($str);
11180 6
        if (isset($map[$key])) {
11181 2
            return $map[$key];
11182
        }
11183
11184 4
        if (\is_numeric($str)) {
11185 2
            return ((float) $str + 0) > 0;
11186
        }
11187
11188 2
        return (bool) \trim($str);
11189
    }
11190
11191
    /**
11192
     * Convert given string to safe filename (and keep string case).
11193
     *
11194
     * @param string $str
11195
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11196
     *                                  simply replaced with hyphen.
11197
     * @param string $fallback_char
11198
     *
11199
     * @return string
11200
     */
11201 1
    public static function to_filename(
11202
        string $str,
11203
        bool $use_transliterate = false,
11204
        string $fallback_char = '-'
11205
    ): string {
11206 1
        return ASCII::to_filename(
11207 1
            $str,
11208 1
            $use_transliterate,
11209 1
            $fallback_char
11210
        );
11211
    }
11212
11213
    /**
11214
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11215
     *
11216
     * @param string|string[] $str
11217
     *
11218
     * @return string|string[]
11219
     */
11220 8
    public static function to_iso8859($str)
11221
    {
11222 8
        if (\is_array($str) === true) {
11223 2
            foreach ($str as $k => &$v) {
11224 2
                $v = self::to_iso8859($v);
11225
            }
11226
11227 2
            return $str;
11228
        }
11229
11230 8
        $str = (string) $str;
11231 8
        if ($str === '') {
11232 2
            return '';
11233
        }
11234
11235 8
        return self::utf8_decode($str);
11236
    }
11237
11238
    /**
11239
     * alias for "UTF8::to_iso8859()"
11240
     *
11241
     * @param string|string[] $str
11242
     *
11243
     * @return string|string[]
11244
     *
11245
     * @see UTF8::to_iso8859()
11246
     */
11247 2
    public static function to_latin1($str)
11248
    {
11249 2
        return self::to_iso8859($str);
11250
    }
11251
11252
    /**
11253
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11254
     *
11255
     * <ul>
11256
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11257
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11258
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11259
     * case.</li>
11260
     * </ul>
11261
     *
11262
     * @param string|string[] $str                        <p>Any string or array.</p>
11263
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11264
     *
11265
     * @return string|string[] the UTF-8 encoded string
11266
     */
11267 41
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11268
    {
11269 41
        if (\is_array($str) === true) {
11270 4
            foreach ($str as $k => &$v) {
11271 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11272
            }
11273
11274 4
            return $str;
11275
        }
11276
11277 41
        $str = (string) $str;
11278 41
        if ($str === '') {
11279 6
            return $str;
11280
        }
11281
11282 41
        $max = \strlen($str);
11283 41
        $buf = '';
11284
11285 41
        for ($i = 0; $i < $max; ++$i) {
11286 41
            $c1 = $str[$i];
11287
11288 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11289
11290 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11291
11292 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11293
11294 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11295 20
                        $buf .= $c1 . $c2;
11296 20
                        ++$i;
11297
                    } else { // not valid UTF8 - convert it
11298 34
                        $buf .= self::to_utf8_convert_helper($c1);
11299
                    }
11300 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11301
11302 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11303 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11304
11305 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11306 15
                        $buf .= $c1 . $c2 . $c3;
11307 15
                        $i += 2;
11308
                    } else { // not valid UTF8 - convert it
11309 33
                        $buf .= self::to_utf8_convert_helper($c1);
11310
                    }
11311 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11312
11313 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11314 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11315 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11316
11317 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11318 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11319 8
                        $i += 3;
11320
                    } else { // not valid UTF8 - convert it
11321 26
                        $buf .= self::to_utf8_convert_helper($c1);
11322
                    }
11323
                } else { // doesn't look like UTF8, but should be converted
11324
11325 37
                    $buf .= self::to_utf8_convert_helper($c1);
11326
                }
11327 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11328
11329 4
                $buf .= self::to_utf8_convert_helper($c1);
11330
            } else { // it doesn't need conversion
11331
11332 38
                $buf .= $c1;
11333
            }
11334
        }
11335
11336
        // decode unicode escape sequences + unicode surrogate pairs
11337 41
        $buf = \preg_replace_callback(
11338 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11339
            /**
11340
             * @param array $matches
11341
             *
11342
             * @return string
11343
             */
11344
            static function (array $matches): string {
11345 12
                if (isset($matches[3])) {
11346 12
                    $cp = (int) \hexdec($matches[3]);
11347
                } else {
11348
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11349
                    $cp = ((int) \hexdec($matches[1]) << 10)
11350
                          + (int) \hexdec($matches[2])
11351
                          + 0x10000
11352
                          - (0xD800 << 10)
11353
                          - 0xDC00;
11354
                }
11355
11356
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11357
                //
11358
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11359
11360 12
                if ($cp < 0x80) {
11361 8
                    return (string) self::chr($cp);
11362
                }
11363
11364 9
                if ($cp < 0xA0) {
11365
                    /** @noinspection UnnecessaryCastingInspection */
11366
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11367
                }
11368
11369 9
                return self::decimal_to_chr($cp);
11370 41
            },
11371 41
            $buf
11372
        );
11373
11374 41
        if ($buf === null) {
11375
            return '';
11376
        }
11377
11378
        // decode UTF-8 codepoints
11379 41
        if ($decode_html_entity_to_utf8 === true) {
11380 2
            $buf = self::html_entity_decode($buf);
11381
        }
11382
11383 41
        return $buf;
11384
    }
11385
11386
    /**
11387
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11388
     *
11389
     * INFO: This is slower then "trim()"
11390
     *
11391
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11392
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11393
     *
11394
     * @param string      $str   <p>The string to be trimmed</p>
11395
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11396
     *
11397
     * @return string the trimmed string
11398
     */
11399 55
    public static function trim(string $str = '', string $chars = null): string
11400
    {
11401 55
        if ($str === '') {
11402 9
            return '';
11403
        }
11404
11405 48
        if ($chars) {
11406 27
            $chars = \preg_quote($chars, '/');
11407 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11408
        } else {
11409 21
            $pattern = '^[\\s]+|[\\s]+$';
11410
        }
11411
11412 48
        if (self::$SUPPORT['mbstring'] === true) {
11413
            /** @noinspection PhpComposerExtensionStubsInspection */
11414 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11415
        }
11416
11417 8
        return self::regex_replace($str, $pattern, '', '', '/');
11418
    }
11419
11420
    /**
11421
     * Makes string's first char uppercase.
11422
     *
11423
     * @param string      $str                           <p>The input string.</p>
11424
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11425
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11426
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11427
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11428
     *
11429
     * @return string the resulting string
11430
     */
11431 69
    public static function ucfirst(
11432
        string $str,
11433
        string $encoding = 'UTF-8',
11434
        bool $clean_utf8 = false,
11435
        string $lang = null,
11436
        bool $try_to_keep_the_string_length = false
11437
    ): string {
11438 69
        if ($str === '') {
11439 3
            return '';
11440
        }
11441
11442 68
        if ($clean_utf8 === true) {
11443
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11444
            // if invalid characters are found in $haystack before $needle
11445 1
            $str = self::clean($str);
11446
        }
11447
11448 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11449
11450 68
        if ($encoding === 'UTF-8') {
11451 22
            $str_part_two = (string) \mb_substr($str, 1);
11452
11453 22
            if ($use_mb_functions === true) {
11454 22
                $str_part_one = \mb_strtoupper(
11455 22
                    (string) \mb_substr($str, 0, 1)
11456
                );
11457
            } else {
11458
                $str_part_one = self::strtoupper(
11459
                    (string) \mb_substr($str, 0, 1),
11460
                    $encoding,
11461
                    false,
11462
                    $lang,
11463 22
                    $try_to_keep_the_string_length
11464
                );
11465
            }
11466
        } else {
11467 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11468
11469 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11470
11471 47
            if ($use_mb_functions === true) {
11472 47
                $str_part_one = \mb_strtoupper(
11473 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11474 47
                    $encoding
11475
                );
11476
            } else {
11477
                $str_part_one = self::strtoupper(
11478
                    (string) self::substr($str, 0, 1, $encoding),
11479
                    $encoding,
11480
                    false,
11481
                    $lang,
11482
                    $try_to_keep_the_string_length
11483
                );
11484
            }
11485
        }
11486
11487 68
        return $str_part_one . $str_part_two;
11488
    }
11489
11490
    /**
11491
     * alias for "UTF8::ucfirst()"
11492
     *
11493
     * @param string $str
11494
     * @param string $encoding
11495
     * @param bool   $clean_utf8
11496
     *
11497
     * @return string
11498
     *
11499
     * @see UTF8::ucfirst()
11500
     */
11501 1
    public static function ucword(
11502
        string $str,
11503
        string $encoding = 'UTF-8',
11504
        bool $clean_utf8 = false
11505
    ): string {
11506 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11507
    }
11508
11509
    /**
11510
     * Uppercase for all words in the string.
11511
     *
11512
     * @param string   $str        <p>The input string.</p>
11513
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11514
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11515
     *                             word.</p>
11516
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11517
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11518
     *
11519
     * @return string
11520
     */
11521 8
    public static function ucwords(
11522
        string $str,
11523
        array $exceptions = [],
11524
        string $char_list = '',
11525
        string $encoding = 'UTF-8',
11526
        bool $clean_utf8 = false
11527
    ): string {
11528 8
        if (!$str) {
11529 2
            return '';
11530
        }
11531
11532
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11533
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11534
11535 7
        if ($clean_utf8 === true) {
11536
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11537
            // if invalid characters are found in $haystack before $needle
11538 1
            $str = self::clean($str);
11539
        }
11540
11541 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11542
11543
        if (
11544 7
            $use_php_default_functions === true
11545
            &&
11546 7
            ASCII::is_ascii($str) === true
11547
        ) {
11548
            return \ucwords($str);
11549
        }
11550
11551 7
        $words = self::str_to_words($str, $char_list);
11552 7
        $use_exceptions = \count($exceptions) > 0;
11553
11554 7
        foreach ($words as &$word) {
11555 7
            if (!$word) {
11556 7
                continue;
11557
            }
11558
11559
            if (
11560 7
                $use_exceptions === false
11561
                ||
11562 7
                !\in_array($word, $exceptions, true)
11563
            ) {
11564 7
                $word = self::ucfirst($word, $encoding);
11565
            }
11566
        }
11567
11568 7
        return \implode('', $words);
11569
    }
11570
11571
    /**
11572
     * Multi decode html entity & fix urlencoded-win1252-chars.
11573
     *
11574
     * e.g:
11575
     * 'test+test'                     => 'test test'
11576
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11577
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11578
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11579
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11580
     * 'Düsseldorf'                   => 'Düsseldorf'
11581
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11582
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11583
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11584
     *
11585
     * @param string $str          <p>The input string.</p>
11586
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11587
     *
11588
     * @return string
11589
     */
11590 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11591
    {
11592 4
        if ($str === '') {
11593 3
            return '';
11594
        }
11595
11596
        if (
11597 4
            \strpos($str, '&') === false
11598
            &&
11599 4
            \strpos($str, '%') === false
11600
            &&
11601 4
            \strpos($str, '+') === false
11602
            &&
11603 4
            \strpos($str, '\u') === false
11604
        ) {
11605 3
            return self::fix_simple_utf8($str);
11606
        }
11607
11608 4
        $str = self::urldecode_unicode_helper($str);
11609
11610
        do {
11611 4
            $str_compare = $str;
11612
11613
            /**
11614
             * @psalm-suppress PossiblyInvalidArgument
11615
             */
11616 4
            $str = self::fix_simple_utf8(
11617 4
                \urldecode(
11618 4
                    self::html_entity_decode(
11619 4
                        self::to_utf8($str),
11620 4
                        \ENT_QUOTES | \ENT_HTML5
11621
                    )
11622
                )
11623
            );
11624 4
        } while ($multi_decode === true && $str_compare !== $str);
11625
11626 4
        return $str;
11627
    }
11628
11629
    /**
11630
     * Return a array with "urlencoded"-win1252 -> UTF-8
11631
     *
11632
     * @return string[]
11633
     *
11634
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11635
     */
11636 2
    public static function urldecode_fix_win1252_chars(): array
11637
    {
11638
        return [
11639 2
            '%20' => ' ',
11640
            '%21' => '!',
11641
            '%22' => '"',
11642
            '%23' => '#',
11643
            '%24' => '$',
11644
            '%25' => '%',
11645
            '%26' => '&',
11646
            '%27' => "'",
11647
            '%28' => '(',
11648
            '%29' => ')',
11649
            '%2A' => '*',
11650
            '%2B' => '+',
11651
            '%2C' => ',',
11652
            '%2D' => '-',
11653
            '%2E' => '.',
11654
            '%2F' => '/',
11655
            '%30' => '0',
11656
            '%31' => '1',
11657
            '%32' => '2',
11658
            '%33' => '3',
11659
            '%34' => '4',
11660
            '%35' => '5',
11661
            '%36' => '6',
11662
            '%37' => '7',
11663
            '%38' => '8',
11664
            '%39' => '9',
11665
            '%3A' => ':',
11666
            '%3B' => ';',
11667
            '%3C' => '<',
11668
            '%3D' => '=',
11669
            '%3E' => '>',
11670
            '%3F' => '?',
11671
            '%40' => '@',
11672
            '%41' => 'A',
11673
            '%42' => 'B',
11674
            '%43' => 'C',
11675
            '%44' => 'D',
11676
            '%45' => 'E',
11677
            '%46' => 'F',
11678
            '%47' => 'G',
11679
            '%48' => 'H',
11680
            '%49' => 'I',
11681
            '%4A' => 'J',
11682
            '%4B' => 'K',
11683
            '%4C' => 'L',
11684
            '%4D' => 'M',
11685
            '%4E' => 'N',
11686
            '%4F' => 'O',
11687
            '%50' => 'P',
11688
            '%51' => 'Q',
11689
            '%52' => 'R',
11690
            '%53' => 'S',
11691
            '%54' => 'T',
11692
            '%55' => 'U',
11693
            '%56' => 'V',
11694
            '%57' => 'W',
11695
            '%58' => 'X',
11696
            '%59' => 'Y',
11697
            '%5A' => 'Z',
11698
            '%5B' => '[',
11699
            '%5C' => '\\',
11700
            '%5D' => ']',
11701
            '%5E' => '^',
11702
            '%5F' => '_',
11703
            '%60' => '`',
11704
            '%61' => 'a',
11705
            '%62' => 'b',
11706
            '%63' => 'c',
11707
            '%64' => 'd',
11708
            '%65' => 'e',
11709
            '%66' => 'f',
11710
            '%67' => 'g',
11711
            '%68' => 'h',
11712
            '%69' => 'i',
11713
            '%6A' => 'j',
11714
            '%6B' => 'k',
11715
            '%6C' => 'l',
11716
            '%6D' => 'm',
11717
            '%6E' => 'n',
11718
            '%6F' => 'o',
11719
            '%70' => 'p',
11720
            '%71' => 'q',
11721
            '%72' => 'r',
11722
            '%73' => 's',
11723
            '%74' => 't',
11724
            '%75' => 'u',
11725
            '%76' => 'v',
11726
            '%77' => 'w',
11727
            '%78' => 'x',
11728
            '%79' => 'y',
11729
            '%7A' => 'z',
11730
            '%7B' => '{',
11731
            '%7C' => '|',
11732
            '%7D' => '}',
11733
            '%7E' => '~',
11734
            '%7F' => '',
11735
            '%80' => '`',
11736
            '%81' => '',
11737
            '%82' => '‚',
11738
            '%83' => 'ƒ',
11739
            '%84' => '„',
11740
            '%85' => '…',
11741
            '%86' => '†',
11742
            '%87' => '‡',
11743
            '%88' => 'ˆ',
11744
            '%89' => '‰',
11745
            '%8A' => 'Š',
11746
            '%8B' => '‹',
11747
            '%8C' => 'Œ',
11748
            '%8D' => '',
11749
            '%8E' => 'Ž',
11750
            '%8F' => '',
11751
            '%90' => '',
11752
            '%91' => '‘',
11753
            '%92' => '’',
11754
            '%93' => '“',
11755
            '%94' => '”',
11756
            '%95' => '•',
11757
            '%96' => '–',
11758
            '%97' => '—',
11759
            '%98' => '˜',
11760
            '%99' => '™',
11761
            '%9A' => 'š',
11762
            '%9B' => '›',
11763
            '%9C' => 'œ',
11764
            '%9D' => '',
11765
            '%9E' => 'ž',
11766
            '%9F' => 'Ÿ',
11767
            '%A0' => '',
11768
            '%A1' => '¡',
11769
            '%A2' => '¢',
11770
            '%A3' => '£',
11771
            '%A4' => '¤',
11772
            '%A5' => '¥',
11773
            '%A6' => '¦',
11774
            '%A7' => '§',
11775
            '%A8' => '¨',
11776
            '%A9' => '©',
11777
            '%AA' => 'ª',
11778
            '%AB' => '«',
11779
            '%AC' => '¬',
11780
            '%AD' => '',
11781
            '%AE' => '®',
11782
            '%AF' => '¯',
11783
            '%B0' => '°',
11784
            '%B1' => '±',
11785
            '%B2' => '²',
11786
            '%B3' => '³',
11787
            '%B4' => '´',
11788
            '%B5' => 'µ',
11789
            '%B6' => '¶',
11790
            '%B7' => '·',
11791
            '%B8' => '¸',
11792
            '%B9' => '¹',
11793
            '%BA' => 'º',
11794
            '%BB' => '»',
11795
            '%BC' => '¼',
11796
            '%BD' => '½',
11797
            '%BE' => '¾',
11798
            '%BF' => '¿',
11799
            '%C0' => 'À',
11800
            '%C1' => 'Á',
11801
            '%C2' => 'Â',
11802
            '%C3' => 'Ã',
11803
            '%C4' => 'Ä',
11804
            '%C5' => 'Å',
11805
            '%C6' => 'Æ',
11806
            '%C7' => 'Ç',
11807
            '%C8' => 'È',
11808
            '%C9' => 'É',
11809
            '%CA' => 'Ê',
11810
            '%CB' => 'Ë',
11811
            '%CC' => 'Ì',
11812
            '%CD' => 'Í',
11813
            '%CE' => 'Î',
11814
            '%CF' => 'Ï',
11815
            '%D0' => 'Ð',
11816
            '%D1' => 'Ñ',
11817
            '%D2' => 'Ò',
11818
            '%D3' => 'Ó',
11819
            '%D4' => 'Ô',
11820
            '%D5' => 'Õ',
11821
            '%D6' => 'Ö',
11822
            '%D7' => '×',
11823
            '%D8' => 'Ø',
11824
            '%D9' => 'Ù',
11825
            '%DA' => 'Ú',
11826
            '%DB' => 'Û',
11827
            '%DC' => 'Ü',
11828
            '%DD' => 'Ý',
11829
            '%DE' => 'Þ',
11830
            '%DF' => 'ß',
11831
            '%E0' => 'à',
11832
            '%E1' => 'á',
11833
            '%E2' => 'â',
11834
            '%E3' => 'ã',
11835
            '%E4' => 'ä',
11836
            '%E5' => 'å',
11837
            '%E6' => 'æ',
11838
            '%E7' => 'ç',
11839
            '%E8' => 'è',
11840
            '%E9' => 'é',
11841
            '%EA' => 'ê',
11842
            '%EB' => 'ë',
11843
            '%EC' => 'ì',
11844
            '%ED' => 'í',
11845
            '%EE' => 'î',
11846
            '%EF' => 'ï',
11847
            '%F0' => 'ð',
11848
            '%F1' => 'ñ',
11849
            '%F2' => 'ò',
11850
            '%F3' => 'ó',
11851
            '%F4' => 'ô',
11852
            '%F5' => 'õ',
11853
            '%F6' => 'ö',
11854
            '%F7' => '÷',
11855
            '%F8' => 'ø',
11856
            '%F9' => 'ù',
11857
            '%FA' => 'ú',
11858
            '%FB' => 'û',
11859
            '%FC' => 'ü',
11860
            '%FD' => 'ý',
11861
            '%FE' => 'þ',
11862
            '%FF' => 'ÿ',
11863
        ];
11864
    }
11865
11866
    /**
11867
     * Decodes an UTF-8 string to ISO-8859-1.
11868
     *
11869
     * @param string $str             <p>The input string.</p>
11870
     * @param bool   $keep_utf8_chars
11871
     *
11872
     * @return string
11873
     */
11874 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
11875
    {
11876 14
        if ($str === '') {
11877 6
            return '';
11878
        }
11879
11880
        // save for later comparision
11881 14
        $str_backup = $str;
11882 14
        $len = \strlen($str);
11883
11884 14
        if (self::$ORD === null) {
11885
            self::$ORD = self::getData('ord');
11886
        }
11887
11888 14
        if (self::$CHR === null) {
11889
            self::$CHR = self::getData('chr');
11890
        }
11891
11892 14
        $no_char_found = '?';
11893
        /** @noinspection ForeachInvariantsInspection */
11894 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11895 14
            switch ($str[$i] & "\xF0") {
11896 14
                case "\xC0":
11897 13
                case "\xD0":
11898 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11899 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
11900
11901 13
                    break;
11902
11903
                /** @noinspection PhpMissingBreakStatementInspection */
11904 13
                case "\xF0":
11905
                    ++$i;
11906
11907
                // no break
11908
11909 13
                case "\xE0":
11910 11
                    $str[$j] = $no_char_found;
11911 11
                    $i += 2;
11912
11913 11
                    break;
11914
11915
                default:
11916 12
                    $str[$j] = $str[$i];
11917
            }
11918
        }
11919
11920 14
        $return = \substr($str, 0, $j);
11921 14
        if ($return === false) {
11922
            $return = '';
11923
        }
11924
11925
        if (
11926 14
            $keep_utf8_chars === true
11927
            &&
11928 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11929
        ) {
11930 2
            return $str_backup;
11931
        }
11932
11933 14
        return $return;
11934
    }
11935
11936
    /**
11937
     * Encodes an ISO-8859-1 string to UTF-8.
11938
     *
11939
     * @param string $str <p>The input string.</p>
11940
     *
11941
     * @return string
11942
     */
11943 14
    public static function utf8_encode(string $str): string
11944
    {
11945 14
        if ($str === '') {
11946 14
            return '';
11947
        }
11948
11949 14
        $str = \utf8_encode($str);
11950
11951
        // the polyfill maybe return false
11952
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
11953
        /** @psalm-suppress TypeDoesNotContainType */
11954 14
        if ($str === false) {
11955
            return '';
11956
        }
11957
11958 14
        return $str;
11959
    }
11960
11961
    /**
11962
     * fix -> utf8-win1252 chars
11963
     *
11964
     * @param string $str <p>The input string.</p>
11965
     *
11966
     * @return string
11967
     *
11968
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
11969
     */
11970 2
    public static function utf8_fix_win1252_chars(string $str): string
11971
    {
11972 2
        return self::fix_simple_utf8($str);
11973
    }
11974
11975
    /**
11976
     * Returns an array with all utf8 whitespace characters.
11977
     *
11978
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
11979
     *
11980
     * @return string[]
11981
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
11982
     *                  as defined in above URL
11983
     */
11984 2
    public static function whitespace_table(): array
11985
    {
11986 2
        return self::$WHITESPACE_TABLE;
11987
    }
11988
11989
    /**
11990
     * Limit the number of words in a string.
11991
     *
11992
     * @param string $str        <p>The input string.</p>
11993
     * @param int    $limit      <p>The limit of words as integer.</p>
11994
     * @param string $str_add_on <p>Replacement for the striped string.</p>
11995
     *
11996
     * @return string
11997
     */
11998 2
    public static function words_limit(
11999
        string $str,
12000
        int $limit = 100,
12001
        string $str_add_on = '…'
12002
    ): string {
12003 2
        if ($str === '' || $limit < 1) {
12004 2
            return '';
12005
        }
12006
12007 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12008
12009
        if (
12010 2
            !isset($matches[0])
12011
            ||
12012 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12013
        ) {
12014 2
            return $str;
12015
        }
12016
12017 2
        return \rtrim($matches[0]) . $str_add_on;
12018
    }
12019
12020
    /**
12021
     * Wraps a string to a given number of characters
12022
     *
12023
     * @see http://php.net/manual/en/function.wordwrap.php
12024
     *
12025
     * @param string $str   <p>The input string.</p>
12026
     * @param int    $width [optional] <p>The column width.</p>
12027
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12028
     * @param bool   $cut   [optional] <p>
12029
     *                      If the cut is set to true, the string is
12030
     *                      always wrapped at or before the specified width. So if you have
12031
     *                      a word that is larger than the given width, it is broken apart.
12032
     *                      </p>
12033
     *
12034
     * @return string
12035
     *                <p>The given string wrapped at the specified column.</p>
12036
     */
12037 12
    public static function wordwrap(
12038
        string $str,
12039
        int $width = 75,
12040
        string $break = "\n",
12041
        bool $cut = false
12042
    ): string {
12043 12
        if ($str === '' || $break === '') {
12044 4
            return '';
12045
        }
12046
12047 10
        $str_split = \explode($break, $str);
12048 10
        if ($str_split === false) {
12049
            return '';
12050
        }
12051
12052 10
        $chars = [];
12053 10
        $word_split = '';
12054 10
        foreach ($str_split as $i => $i_value) {
12055 10
            if ($i) {
12056 3
                $chars[] = $break;
12057 3
                $word_split .= '#';
12058
            }
12059
12060 10
            foreach (self::str_split($i_value) as $c) {
12061 10
                $chars[] = $c;
12062 10
                if ($c === ' ') {
12063 3
                    $word_split .= ' ';
12064
                } else {
12065 10
                    $word_split .= '?';
12066
                }
12067
            }
12068
        }
12069
12070 10
        $str_return = '';
12071 10
        $j = 0;
12072 10
        $b = -1;
12073 10
        $i = -1;
12074 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12075
12076 10
        $max = \mb_strlen($word_split);
12077 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12078 8
            for (++$i; $i < $b; ++$i) {
12079 8
                $str_return .= $chars[$j];
12080 8
                unset($chars[$j++]);
12081
12082
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12083 8
                if ($i > $max) {
12084
                    break 2;
12085
                }
12086
            }
12087
12088
            if (
12089 8
                $break === $chars[$j]
12090
                ||
12091 8
                $chars[$j] === ' '
12092
            ) {
12093 5
                unset($chars[$j++]);
12094
            }
12095
12096 8
            $str_return .= $break;
12097
12098
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12099 8
            if ($b > $max) {
12100
                break;
12101
            }
12102
        }
12103
12104 10
        return $str_return . \implode('', $chars);
12105
    }
12106
12107
    /**
12108
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12109
     *    ... so that we wrap the per line.
12110
     *
12111
     * @param string      $str             <p>The input string.</p>
12112
     * @param int         $width           [optional] <p>The column width.</p>
12113
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12114
     * @param bool        $cut             [optional] <p>
12115
     *                                     If the cut is set to true, the string is
12116
     *                                     always wrapped at or before the specified width. So if you have
12117
     *                                     a word that is larger than the given width, it is broken apart.
12118
     *                                     </p>
12119
     * @param bool        $add_final_break [optional] <p>
12120
     *                                     If this flag is true, then the method will add a $break at the end
12121
     *                                     of the result string.
12122
     *                                     </p>
12123
     * @param string|null $delimiter       [optional] <p>
12124
     *                                     You can change the default behavior, where we split the string by newline.
12125
     *                                     </p>
12126
     *
12127
     * @return string
12128
     */
12129 1
    public static function wordwrap_per_line(
12130
        string $str,
12131
        int $width = 75,
12132
        string $break = "\n",
12133
        bool $cut = false,
12134
        bool $add_final_break = true,
12135
        string $delimiter = null
12136
    ): string {
12137 1
        if ($delimiter === null) {
12138 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12139
        } else {
12140 1
            $strings = \explode($delimiter, $str);
12141
        }
12142
12143 1
        $string_helper_array = [];
12144 1
        if ($strings !== false) {
12145 1
            foreach ($strings as $value) {
12146 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12147
            }
12148
        }
12149
12150 1
        if ($add_final_break) {
12151 1
            $final_break = $break;
12152
        } else {
12153 1
            $final_break = '';
12154
        }
12155
12156 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12157
    }
12158
12159
    /**
12160
     * Returns an array of Unicode White Space characters.
12161
     *
12162
     * @return string[] an array with numeric code point as key and White Space Character as value
12163
     */
12164 2
    public static function ws(): array
12165
    {
12166 2
        return self::$WHITESPACE;
12167
    }
12168
12169
    /**
12170
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
12171
     *
12172
     * @see http://hsivonen.iki.fi/php-utf8/
12173
     *
12174
     * @param string $str    <p>The string to be checked.</p>
12175
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12176
     *
12177
     * @return bool
12178
     */
12179 108
    private static function is_utf8_string(string $str, bool $strict = false): bool
12180
    {
12181 108
        if ($str === '') {
12182 14
            return true;
12183
        }
12184
12185 102
        if ($strict === true) {
12186 2
            $is_binary = self::is_binary($str, true);
12187
12188 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12189 2
                return false;
12190
            }
12191
12192
            if ($is_binary && self::is_utf32($str, false) !== false) {
12193
                return false;
12194
            }
12195
        }
12196
12197 102
        if (self::pcre_utf8_support() !== true) {
12198
            // If even just the first character can be matched, when the /u
12199
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12200
            // invalid, nothing at all will match, even if the string contains
12201
            // some valid sequences
12202
            return \preg_match('/^./us', $str, $ar) === 1;
12203
        }
12204
12205 102
        $mState = 0; // cached expected number of octets after the current octet
12206
        // until the beginning of the next UTF8 character sequence
12207 102
        $mUcs4 = 0; // cached Unicode character
12208 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12209
12210 102
        if (self::$ORD === null) {
12211
            self::$ORD = self::getData('ord');
12212
        }
12213
12214 102
        $len = \strlen($str);
12215
        /** @noinspection ForeachInvariantsInspection */
12216 102
        for ($i = 0; $i < $len; ++$i) {
12217 102
            $in = self::$ORD[$str[$i]];
12218
12219 102
            if ($mState === 0) {
12220
                // When mState is zero we expect either a US-ASCII character or a
12221
                // multi-octet sequence.
12222 102
                if ((0x80 & $in) === 0) {
12223
                    // US-ASCII, pass straight through.
12224 97
                    $mBytes = 1;
12225 83
                } elseif ((0xE0 & $in) === 0xC0) {
12226
                    // First octet of 2 octet sequence.
12227 73
                    $mUcs4 = $in;
12228 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12229 73
                    $mState = 1;
12230 73
                    $mBytes = 2;
12231 58
                } elseif ((0xF0 & $in) === 0xE0) {
12232
                    // First octet of 3 octet sequence.
12233 42
                    $mUcs4 = $in;
12234 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12235 42
                    $mState = 2;
12236 42
                    $mBytes = 3;
12237 29
                } elseif ((0xF8 & $in) === 0xF0) {
12238
                    // First octet of 4 octet sequence.
12239 18
                    $mUcs4 = $in;
12240 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12241 18
                    $mState = 3;
12242 18
                    $mBytes = 4;
12243 13
                } elseif ((0xFC & $in) === 0xF8) {
12244
                    /* First octet of 5 octet sequence.
12245
                     *
12246
                     * This is illegal because the encoded codepoint must be either
12247
                     * (a) not the shortest form or
12248
                     * (b) outside the Unicode range of 0-0x10FFFF.
12249
                     * Rather than trying to resynchronize, we will carry on until the end
12250
                     * of the sequence and let the later error handling code catch it.
12251
                     */
12252 5
                    $mUcs4 = $in;
12253 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12254 5
                    $mState = 4;
12255 5
                    $mBytes = 5;
12256 10
                } elseif ((0xFE & $in) === 0xFC) {
12257
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12258 5
                    $mUcs4 = $in;
12259 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12260 5
                    $mState = 5;
12261 5
                    $mBytes = 6;
12262
                } else {
12263
                    // Current octet is neither in the US-ASCII range nor a legal first
12264
                    // octet of a multi-octet sequence.
12265 102
                    return false;
12266
                }
12267 83
            } elseif ((0xC0 & $in) === 0x80) {
12268
12269
                // When mState is non-zero, we expect a continuation of the multi-octet
12270
                // sequence
12271
12272
                // Legal continuation.
12273 75
                $shift = ($mState - 1) * 6;
12274 75
                $tmp = $in;
12275 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12276 75
                $mUcs4 |= $tmp;
12277
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12278
                // Unicode code point to be output.
12279 75
                if (--$mState === 0) {
12280
                    // Check for illegal sequences and code points.
12281
                    //
12282
                    // From Unicode 3.1, non-shortest form is illegal
12283
                    if (
12284 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12285
                        ||
12286 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12287
                        ||
12288 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12289
                        ||
12290 75
                        ($mBytes > 4)
12291
                        ||
12292
                        // From Unicode 3.2, surrogate characters are illegal.
12293 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12294
                        ||
12295
                        // Code points outside the Unicode range are illegal.
12296 75
                        ($mUcs4 > 0x10FFFF)
12297
                    ) {
12298 8
                        return false;
12299
                    }
12300
                    // initialize UTF8 cache
12301 75
                    $mState = 0;
12302 75
                    $mUcs4 = 0;
12303 75
                    $mBytes = 1;
12304
                }
12305
            } else {
12306
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12307
                // Incomplete multi-octet sequence.
12308 35
                return false;
12309
            }
12310
        }
12311
12312 67
        return true;
12313
    }
12314
12315
    /**
12316
     * @param string $str
12317
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12318
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12319
     *
12320
     * @return string
12321
     */
12322 33
    private static function fixStrCaseHelper(
12323
        string $str,
12324
        $use_lowercase = false,
12325
        $use_full_case_fold = false
12326
    ): string {
12327 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12328 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12329
12330 33
        if ($use_lowercase === true) {
12331 2
            $str = \str_replace(
12332 2
                $upper,
12333 2
                $lower,
12334 2
                $str
12335
            );
12336
        } else {
12337 31
            $str = \str_replace(
12338 31
                $lower,
12339 31
                $upper,
12340 31
                $str
12341
            );
12342
        }
12343
12344 33
        if ($use_full_case_fold) {
12345 31
            static $FULL_CASE_FOLD = null;
12346 31
            if ($FULL_CASE_FOLD === null) {
12347 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12348
            }
12349
12350 31
            if ($use_lowercase === true) {
12351 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12352
            } else {
12353 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12354
            }
12355
        }
12356
12357 33
        return $str;
12358
    }
12359
12360
    /**
12361
     * get data from "/data/*.php"
12362
     *
12363
     * @param string $file
12364
     *
12365
     * @return array
12366
     */
12367 6
    private static function getData(string $file): array
12368
    {
12369
        /** @noinspection PhpIncludeInspection */
12370
        /** @noinspection UsingInclusionReturnValueInspection */
12371
        /** @psalm-suppress UnresolvableInclude */
12372 6
        return include __DIR__ . '/data/' . $file . '.php';
12373
    }
12374
12375
    /**
12376
     * @return true|null
12377
     */
12378 12
    private static function initEmojiData()
12379
    {
12380 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12381 1
            if (self::$EMOJI === null) {
12382 1
                self::$EMOJI = self::getData('emoji');
12383
            }
12384
12385 1
            \uksort(
12386 1
                self::$EMOJI,
12387
                static function (string $a, string $b): int {
12388 1
                    return \strlen($b) <=> \strlen($a);
12389 1
                }
12390
            );
12391
12392 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12393 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12394
12395 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12396 1
                $tmp_key = \crc32($key);
12397 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12398
            }
12399
12400 1
            return true;
12401
        }
12402
12403 12
        return null;
12404
    }
12405
12406
    /**
12407
     * Checks whether mbstring "overloaded" is active on the server.
12408
     *
12409
     * @return bool
12410
     */
12411
    private static function mbstring_overloaded(): bool
12412
    {
12413
        /**
12414
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12415
         */
12416
12417
        /** @noinspection PhpComposerExtensionStubsInspection */
12418
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12419
        return \defined('MB_OVERLOAD_STRING')
12420
               &&
12421
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12422
    }
12423
12424
    /**
12425
     * @param array    $strings
12426
     * @param bool     $remove_empty_values
12427
     * @param int|null $remove_short_values
12428
     *
12429
     * @return array
12430
     */
12431 2
    private static function reduce_string_array(
12432
        array $strings,
12433
        bool $remove_empty_values,
12434
        int $remove_short_values = null
12435
    ): array {
12436
        // init
12437 2
        $return = [];
12438
12439 2
        foreach ($strings as &$str) {
12440
            if (
12441 2
                $remove_short_values !== null
12442
                &&
12443 2
                \mb_strlen($str) <= $remove_short_values
12444
            ) {
12445 2
                continue;
12446
            }
12447
12448
            if (
12449 2
                $remove_empty_values === true
12450
                &&
12451 2
                \trim($str) === ''
12452
            ) {
12453 2
                continue;
12454
            }
12455
12456 2
            $return[] = $str;
12457
        }
12458
12459 2
        return $return;
12460
    }
12461
12462
    /**
12463
     * rxClass
12464
     *
12465
     * @param string $s
12466
     * @param string $class
12467
     *
12468
     * @return string
12469
     */
12470 33
    private static function rxClass(string $s, string $class = ''): string
12471
    {
12472 33
        static $RX_CLASS_CACHE = [];
12473
12474 33
        $cache_key = $s . $class;
12475
12476 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12477 21
            return $RX_CLASS_CACHE[$cache_key];
12478
        }
12479
12480 16
        $class_array = [$class];
12481
12482
        /** @noinspection SuspiciousLoopInspection */
12483
        /** @noinspection AlterInForeachInspection */
12484 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12485 15
            if ($s === '-') {
12486
                $class_array[0] = '-' . $class_array[0];
12487 15
            } elseif (!isset($s[2])) {
12488 15
                $class_array[0] .= \preg_quote($s, '/');
12489 1
            } elseif (self::strlen($s) === 1) {
12490 1
                $class_array[0] .= $s;
12491
            } else {
12492 15
                $class_array[] = $s;
12493
            }
12494
        }
12495
12496 16
        if ($class_array[0]) {
12497 16
            $class_array[0] = '[' . $class_array[0] . ']';
12498
        }
12499
12500 16
        if (\count($class_array) === 1) {
12501 16
            $return = $class_array[0];
12502
        } else {
12503
            $return = '(?:' . \implode('|', $class_array) . ')';
12504
        }
12505
12506 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12507
12508 16
        return $return;
12509
    }
12510
12511
    /**
12512
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12513
     *
12514
     * @param string $names
12515
     * @param string $delimiter
12516
     * @param string $encoding
12517
     *
12518
     * @return string
12519
     */
12520 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12521
    {
12522
        // init
12523 1
        $name_helper_array = \explode($delimiter, $names);
12524 1
        if ($name_helper_array === false) {
12525
            return '';
12526
        }
12527
12528
        $special_cases = [
12529 1
            'names' => [
12530
                'ab',
12531
                'af',
12532
                'al',
12533
                'and',
12534
                'ap',
12535
                'bint',
12536
                'binte',
12537
                'da',
12538
                'de',
12539
                'del',
12540
                'den',
12541
                'der',
12542
                'di',
12543
                'dit',
12544
                'ibn',
12545
                'la',
12546
                'mac',
12547
                'nic',
12548
                'of',
12549
                'ter',
12550
                'the',
12551
                'und',
12552
                'van',
12553
                'von',
12554
                'y',
12555
                'zu',
12556
            ],
12557
            'prefixes' => [
12558
                'al-',
12559
                "d'",
12560
                'ff',
12561
                "l'",
12562
                'mac',
12563
                'mc',
12564
                'nic',
12565
            ],
12566
        ];
12567
12568 1
        foreach ($name_helper_array as &$name) {
12569 1
            if (\in_array($name, $special_cases['names'], true)) {
12570 1
                continue;
12571
            }
12572
12573 1
            $continue = false;
12574
12575 1
            if ($delimiter === '-') {
12576
                /** @noinspection AlterInForeachInspection */
12577 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12578 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12579 1
                        $continue = true;
12580
                    }
12581
                }
12582
            }
12583
12584
            /** @noinspection AlterInForeachInspection */
12585 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12586 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12587 1
                    $continue = true;
12588
                }
12589
            }
12590
12591 1
            if ($continue === true) {
12592 1
                continue;
12593
            }
12594
12595 1
            $name = self::ucfirst($name);
12596
        }
12597
12598 1
        return \implode($delimiter, $name_helper_array);
12599
    }
12600
12601
    /**
12602
     * Generic case sensitive transformation for collation matching.
12603
     *
12604
     * @param string $str <p>The input string</p>
12605
     *
12606
     * @return string|null
12607
     */
12608 6
    private static function strtonatfold(string $str)
12609
    {
12610
        /** @noinspection PhpUndefinedClassInspection */
12611 6
        return \preg_replace(
12612 6
            '/\p{Mn}+/u',
12613 6
            '',
12614 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12615
        );
12616
    }
12617
12618
    /**
12619
     * @param int|string $input
12620
     *
12621
     * @return string
12622
     */
12623 31
    private static function to_utf8_convert_helper($input): string
12624
    {
12625
        // init
12626 31
        $buf = '';
12627
12628 31
        if (self::$ORD === null) {
12629 1
            self::$ORD = self::getData('ord');
12630
        }
12631
12632 31
        if (self::$CHR === null) {
12633 1
            self::$CHR = self::getData('chr');
12634
        }
12635
12636 31
        if (self::$WIN1252_TO_UTF8 === null) {
12637 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12638
        }
12639
12640 31
        $ordC1 = self::$ORD[$input];
12641 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12642 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12643
        } else {
12644
            /** @noinspection OffsetOperationsInspection */
12645 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12646 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12647 1
            $buf .= $cc1 . $cc2;
12648
        }
12649
12650 31
        return $buf;
12651
    }
12652
12653
    /**
12654
     * @param string $str
12655
     *
12656
     * @return string
12657
     */
12658 9
    private static function urldecode_unicode_helper(string $str): string
12659
    {
12660 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12661 9
        if (\preg_match($pattern, $str)) {
12662 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12663
        }
12664
12665 9
        return $str;
12666
    }
12667
}
12668