Passed
Push — master ( 2a9eb0...6e9275 )
by Lars
03:30
created

UTF8   F

Complexity

Total Complexity 1688

Size/Duplication

Total Lines 12733
Duplicated Lines 0 %

Test Coverage

Coverage 79.9%

Importance

Changes 94
Bugs 51 Features 6
Metric Value
eloc 4321
dl 0
loc 12733
ccs 3049
cts 3816
cp 0.799
rs 0.8
c 94
b 51
f 6
wmc 1688

299 Methods

Rating   Name   Duplication   Size   Complexity  
A add_bom_to_string() 0 7 2
A __construct() 0 2 1
A access() 0 11 4
A mbstring_overloaded() 0 11 2
A chr_to_decimal() 0 30 6
A array_change_key_case() 0 23 5
D chr() 0 101 18
B between() 0 48 8
A chr_map() 0 5 1
A char_at() 0 7 2
A chars() 0 3 1
A chr_size_list() 0 17 3
A checkForSupport() 0 47 4
A chr_to_hex() 0 11 3
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A file_has_bom() 0 8 2
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 70 10
A parse_str() 0 16 4
A filter_input() 0 13 2
A str_contains() 0 10 2
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A remove_left() 0 24 4
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
A isBinary() 0 3 1
A ltrim() 0 19 4
A emoji_decode() 0 18 2
A is_utf8() 0 13 4
A remove_html() 0 3 1
A lcword() 0 13 1
A mbstring_loaded() 0 3 1
A html_escape() 0 6 1
C normalize_encoding() 0 134 14
B get_file_type() 0 61 7
A str_ensure_right() 0 13 4
A chr_to_int() 0 3 1
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 59 13
A normalize_whitespace() 0 9 1
A isBase64() 0 3 1
A str_humanize() 0 15 1
A is_html() 0 14 2
A decode_mimeheader() 0 15 5
A html_decode() 0 6 1
A isUtf32() 0 3 1
A rtrim() 0 19 4
A regex_replace() 0 20 3
A chunk_split() 0 3 1
A replace_all() 0 11 2
A removeBOM() 0 3 1
A emoji_encode() 0 18 2
A is_alpha() 0 8 2
B get_random_string() 0 56 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A isUtf8() 0 3 1
A str_iends() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 47 6
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A str_contains_all() 0 23 6
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
D range() 0 65 23
B rawurldecode() 0 37 8
A str_ends() 0 3 1
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A is_blank() 0 8 2
D getCharDirection() 0 105 118
A htmlspecialchars() 0 15 3
A replace() 0 11 2
A filter_var_array() 0 12 2
A decimal_to_chr() 0 3 1
A pcre_utf8_support() 0 4 1
A codepoints() 0 29 4
A lowerCaseFirst() 0 13 1
A str_ends_with_any() 0 13 4
A cleanup() 0 25 2
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
A finfo_loaded() 0 3 1
A str_ends_with() 0 11 3
A fits_inside() 0 3 1
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A remove_bom() 0 22 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A hasBom() 0 3 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A filter_var() 0 12 2
A is_empty() 0 3 1
B html_encode() 0 53 11
A str_dasherize() 0 3 1
A isUtf16() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 140 37
C is_utf32() 0 65 16
C ord() 0 72 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
B is_json() 0 29 8
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A is_base64() 0 20 5
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 3 1
A isJson() 0 3 1
A filter_input_array() 0 12 2
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 38 5
B str_delimit() 0 33 8
A min() 0 14 3
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 78 17
A split() 0 6 1
B str_contains_any() 0 29 8
A remove_duplicates() 0 14 4
B file_get_contents() 0 56 11
A str_substr_after_first_separator() 0 28 6
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 99 19
A html_entity_decode_helper() 0 18 3
A str_isubstr_last() 0 25 4
A str_replace_beginning() 0 24 6
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 9 1
C utf8_decode() 0 61 13
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A str_limit() 0 26 6
A toUTF8() 0 3 1
A string() 0 10 1
B rxClass() 0 39 8
B str_titleize_for_humans() 0 155 5
A str_starts_with() 0 11 3
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
C str_longest_common_substring() 0 76 16
A titlecase() 0 31 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
D substr_replace() 0 124 27
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 19 4
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 34 8
A str_isubstr_after_last_separator() 0 26 5
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B str_capitalize_name_helper() 0 79 10
A utf8_encode() 0 16 3
A str_istarts_with() 0 11 3
A str_replace() 0 14 1
A substr_iright() 0 15 4
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
D str_split() 0 125 28
A strrpos_in_byte() 0 12 4
F strrpos() 0 119 25
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 55 6
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
B str_limit_after_word() 0 55 11
A str_upper_first() 0 13 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
A strtonatfold() 0 7 1
C strcspn() 0 52 12
A fixStrCaseHelper() 0 36 5
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 54 10
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
A initEmojiData() 0 26 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // IDEOGRAPHIC SPACE
92
        12288 => "\xe3\x80\x80",
93
    ];
94
95
    /**
96
     * @var array
97
     */
98
    private static $WHITESPACE_TABLE = [
99
        'SPACE'                     => "\x20",
100
        'NO-BREAK SPACE'            => "\xc2\xa0",
101
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
102
        'EN QUAD'                   => "\xe2\x80\x80",
103
        'EM QUAD'                   => "\xe2\x80\x81",
104
        'EN SPACE'                  => "\xe2\x80\x82",
105
        'EM SPACE'                  => "\xe2\x80\x83",
106
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
107
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
108
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
109
        'FIGURE SPACE'              => "\xe2\x80\x87",
110
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
111
        'THIN SPACE'                => "\xe2\x80\x89",
112
        'HAIR SPACE'                => "\xe2\x80\x8a",
113
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
114
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
115
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
116
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
117
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
118
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
119
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
120
    ];
121
122
    /**
123
     * @var array{upper: string[], lower: string[]}
0 ignored issues
show
Documentation Bug introduced by
The doc comment array{upper at position 0 could not be parsed: Unknown type name 'array{upper' at position 0 in array{upper.
Loading history...
124
     */
125
    private static $COMMON_CASE_FOLD = [
126
        'upper' => [
127
            'µ',
128
            'ſ',
129
            "\xCD\x85",
130
            'ς',
131
            'ẞ',
132
            "\xCF\x90",
133
            "\xCF\x91",
134
            "\xCF\x95",
135
            "\xCF\x96",
136
            "\xCF\xB0",
137
            "\xCF\xB1",
138
            "\xCF\xB5",
139
            "\xE1\xBA\x9B",
140
            "\xE1\xBE\xBE",
141
        ],
142
        'lower' => [
143
            'μ',
144
            's',
145
            'ι',
146
            'σ',
147
            'ß',
148
            'β',
149
            'θ',
150
            'φ',
151
            'π',
152
            'κ',
153
            'ρ',
154
            'ε',
155
            "\xE1\xB9\xA1",
156
            'ι',
157
        ],
158
    ];
159
160
    /**
161
     * @var array
162
     */
163
    private static $SUPPORT = [];
164
165
    /**
166
     * @var array|null
167
     */
168
    private static $BROKEN_UTF8_FIX;
169
170
    /**
171
     * @var array|null
172
     */
173
    private static $WIN1252_TO_UTF8;
174
175
    /**
176
     * @var array|null
177
     */
178
    private static $INTL_TRANSLITERATOR_LIST;
179
180
    /**
181
     * @var array|null
182
     */
183
    private static $ENCODINGS;
184
185
    /**
186
     * @var array|null
187
     */
188
    private static $ORD;
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $EMOJI;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $EMOJI_VALUES_CACHE;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $EMOJI_KEYS_CACHE;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $CHR;
214
215
    /**
216
     * __construct()
217
     */
218 33
    public function __construct()
219
    {
220 33
    }
221
222
    /**
223
     * Return the character at the specified position: $str[1] like functionality.
224
     *
225
     * @param string $str      <p>A UTF-8 string.</p>
226
     * @param int    $pos      <p>The position of character to return.</p>
227
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
228
     *
229
     * @return string single multi-byte character
230
     */
231 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
232
    {
233 3
        if ($str === '' || $pos < 0) {
234 2
            return '';
235
        }
236
237 3
        if ($encoding === 'UTF-8') {
238 3
            return (string) \mb_substr($str, $pos, 1);
239
        }
240
241
        return (string) self::substr($str, $pos, 1, $encoding);
242
    }
243
244
    /**
245
     * Prepends UTF-8 BOM character to the string and returns the whole string.
246
     *
247
     * INFO: If BOM already existed there, the Input string is returned.
248
     *
249
     * @param string $str <p>The input string.</p>
250
     *
251
     * @return string the output string that contains BOM
252
     */
253 2
    public static function add_bom_to_string(string $str): string
254
    {
255 2
        if (self::string_has_bom($str) === false) {
256 2
            $str = self::bom() . $str;
257
        }
258
259 2
        return $str;
260
    }
261
262
    /**
263
     * Changes all keys in an array.
264
     *
265
     * @param array  $array    <p>The array to work on</p>
266
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
267
     *                         or <strong>CASE_LOWER</strong> (default)</p>
268
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
269
     *
270
     * @return string[]
271
     *                  <p>An array with its keys lower- or uppercased.</p>
272
     */
273 2
    public static function array_change_key_case(
274
        array $array,
275
        int $case = \CASE_LOWER,
276
        string $encoding = 'UTF-8'
277
    ): array {
278
        if (
279 2
            $case !== \CASE_LOWER
280
            &&
281 2
            $case !== \CASE_UPPER
282
        ) {
283
            $case = \CASE_LOWER;
284
        }
285
286 2
        $return = [];
287 2
        foreach ($array as $key => &$value) {
288 2
            $key = $case === \CASE_LOWER
289 2
                ? self::strtolower((string) $key, $encoding)
290 2
                : self::strtoupper((string) $key, $encoding);
291
292 2
            $return[$key] = $value;
293
        }
294
295 2
        return $return;
296
    }
297
298
    /**
299
     * Returns the substring between $start and $end, if found, or an empty
300
     * string. An optional offset may be supplied from which to begin the
301
     * search for the start string.
302
     *
303
     * @param string $str
304
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
305
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
306
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
307
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
308
     *
309
     * @return string
310
     */
311 16
    public static function between(
312
        string $str,
313
        string $start,
314
        string $end,
315
        int $offset = 0,
316
        string $encoding = 'UTF-8'
317
    ): string {
318 16
        if ($encoding === 'UTF-8') {
319 8
            $start_position = \mb_strpos($str, $start, $offset);
320 8
            if ($start_position === false) {
321 1
                return '';
322
            }
323
324 7
            $substr_index = $start_position + (int) \mb_strlen($start);
325 7
            $end_position = \mb_strpos($str, $end, $substr_index);
326
            if (
327 7
                $end_position === false
328
                ||
329 7
                $end_position === $substr_index
330
            ) {
331 2
                return '';
332
            }
333
334 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
335
        }
336
337 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
338
339 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
340 8
        if ($start_position === false) {
341 1
            return '';
342
        }
343
344 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
345 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
346
        if (
347 7
            $end_position === false
348
            ||
349 7
            $end_position === $substr_index
350
        ) {
351 2
            return '';
352
        }
353
354 5
        return (string) self::substr(
355 5
            $str,
356 5
            $substr_index,
357 5
            $end_position - $substr_index,
358 5
            $encoding
359
        );
360
    }
361
362
    /**
363
     * Convert binary into a string.
364
     *
365
     * @param mixed $bin 1|0
366
     *
367
     * @return string
368
     */
369 2
    public static function binary_to_str($bin): string
370
    {
371 2
        if (!isset($bin[0])) {
372
            return '';
373
        }
374
375 2
        $convert = \base_convert($bin, 2, 16);
376 2
        if ($convert === '0') {
377 1
            return '';
378
        }
379
380 2
        return \pack('H*', $convert);
381
    }
382
383
    /**
384
     * Returns the UTF-8 Byte Order Mark Character.
385
     *
386
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
387
     *
388
     * @return string UTF-8 Byte Order Mark
389
     */
390 4
    public static function bom(): string
391
    {
392 4
        return "\xef\xbb\xbf";
393
    }
394
395
    /**
396
     * @alias of UTF8::chr_map()
397
     *
398
     * @param callable $callback
399
     * @param string   $str
400
     *
401
     * @return string[]
402
     *
403
     * @see UTF8::chr_map()
404
     */
405 2
    public static function callback($callback, string $str): array
406
    {
407 2
        return self::chr_map($callback, $str);
408
    }
409
410
    /**
411
     * Returns the character at $index, with indexes starting at 0.
412
     *
413
     * @param string $str      <p>The input string.</p>
414
     * @param int    $index    <p>Position of the character.</p>
415
     * @param string $encoding [optional] <p>Default is UTF-8</p>
416
     *
417
     * @return string the character at $index
418
     */
419 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
420
    {
421 9
        if ($encoding === 'UTF-8') {
422 5
            return (string) \mb_substr($str, $index, 1);
423
        }
424
425 4
        return (string) self::substr($str, $index, 1, $encoding);
426
    }
427
428
    /**
429
     * Returns an array consisting of the characters in the string.
430
     *
431
     * @param string $str <p>The input string.</p>
432
     *
433
     * @return string[] an array of chars
434
     */
435 3
    public static function chars(string $str): array
436
    {
437 3
        return self::str_split($str);
438
    }
439
440
    /**
441
     * This method will auto-detect your server environment for UTF-8 support.
442
     *
443
     * @return true|null
444
     *
445
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
446
     */
447 5
    public static function checkForSupport()
448
    {
449 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
450
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
451
452
            // http://php.net/manual/en/book.mbstring.php
453
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
454
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
455
            if (self::$SUPPORT['mbstring'] === true) {
456
                \mb_internal_encoding('UTF-8');
457
                /** @noinspection UnusedFunctionResultInspection */
458
                /** @noinspection PhpComposerExtensionStubsInspection */
459
                \mb_regex_encoding('UTF-8');
460
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
461
            }
462
463
            // http://php.net/manual/en/book.iconv.php
464
            self::$SUPPORT['iconv'] = self::iconv_loaded();
465
466
            // http://php.net/manual/en/book.intl.php
467
            self::$SUPPORT['intl'] = self::intl_loaded();
468
469
            // http://php.net/manual/en/class.intlchar.php
470
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
471
472
            // http://php.net/manual/en/book.ctype.php
473
            self::$SUPPORT['ctype'] = self::ctype_loaded();
474
475
            // http://php.net/manual/en/class.finfo.php
476
            self::$SUPPORT['finfo'] = self::finfo_loaded();
477
478
            // http://php.net/manual/en/book.json.php
479
            self::$SUPPORT['json'] = self::json_loaded();
480
481
            // http://php.net/manual/en/book.pcre.php
482
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
483
484
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
485
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
486
                \mb_internal_encoding('UTF-8');
487
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
488
            }
489
490
            return true;
491
        }
492
493 5
        return null;
494
    }
495
496
    /**
497
     * Generates a UTF-8 encoded character from the given code point.
498
     *
499
     * INFO: opposite to UTF8::ord()
500
     *
501
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
502
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
503
     *
504
     * @return string|null multi-byte character, returns null on failure or empty input
505
     */
506 25
    public static function chr($code_point, string $encoding = 'UTF-8')
507
    {
508
        // init
509 25
        static $CHAR_CACHE = [];
510
511 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
512 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
513
        }
514
515
        if (
516 25
            $encoding !== 'UTF-8'
517
            &&
518 25
            $encoding !== 'ISO-8859-1'
519
            &&
520 25
            $encoding !== 'WINDOWS-1252'
521
            &&
522 25
            self::$SUPPORT['mbstring'] === false
523
        ) {
524
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
525
        }
526
527 25
        $cache_key = $code_point . $encoding;
528 25
        if (isset($CHAR_CACHE[$cache_key]) === true) {
529 23
            return $CHAR_CACHE[$cache_key];
530
        }
531
532 14
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
533
534 13
            if (self::$CHR === null) {
535
                self::$CHR = self::getData('chr');
536
            }
537
538
            /**
539
             * @psalm-suppress PossiblyNullArrayAccess
540
             */
541 13
            $chr = self::$CHR[$code_point];
542
543 13
            if ($encoding !== 'UTF-8') {
544 1
                $chr = self::encode($encoding, $chr);
545
            }
546
547 13
            return $CHAR_CACHE[$cache_key] = $chr;
548
        }
549
550
        //
551
        // fallback via "IntlChar"
552
        //
553
554 7
        if (self::$SUPPORT['intlChar'] === true) {
555
            /** @noinspection PhpComposerExtensionStubsInspection */
556 7
            $chr = \IntlChar::chr($code_point);
557
558 7
            if ($encoding !== 'UTF-8') {
559
                $chr = self::encode($encoding, $chr);
560
            }
561
562 7
            return $CHAR_CACHE[$cache_key] = $chr;
563
        }
564
565
        //
566
        // fallback via vanilla php
567
        //
568
569
        if (self::$CHR === null) {
570
            self::$CHR = self::getData('chr');
571
        }
572
573
        $code_point = (int) $code_point;
574
        if ($code_point <= 0x7F) {
575
            /**
576
             * @psalm-suppress PossiblyNullArrayAccess
577
             */
578
            $chr = self::$CHR[$code_point];
579
        } elseif ($code_point <= 0x7FF) {
580
            /**
581
             * @psalm-suppress PossiblyNullArrayAccess
582
             */
583
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
584
                   self::$CHR[($code_point & 0x3F) + 0x80];
585
        } elseif ($code_point <= 0xFFFF) {
586
            /**
587
             * @psalm-suppress PossiblyNullArrayAccess
588
             */
589
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
590
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
591
                   self::$CHR[($code_point & 0x3F) + 0x80];
592
        } else {
593
            /**
594
             * @psalm-suppress PossiblyNullArrayAccess
595
             */
596
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
597
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
598
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
599
                   self::$CHR[($code_point & 0x3F) + 0x80];
600
        }
601
602
        if ($encoding !== 'UTF-8') {
603
            $chr = self::encode($encoding, $chr);
604
        }
605
606
        return $CHAR_CACHE[$cache_key] = $chr;
607
    }
608
609
    /**
610
     * Applies callback to all characters of a string.
611
     *
612
     * @param callable $callback <p>The callback function.</p>
613
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
614
     *
615
     * @return string[] the outcome of callback
616
     */
617 2
    public static function chr_map($callback, string $str): array
618
    {
619 2
        return \array_map(
620 2
            $callback,
621 2
            self::str_split($str)
622
        );
623
    }
624
625
    /**
626
     * Generates an array of byte length of each character of a Unicode string.
627
     *
628
     * 1 byte => U+0000  - U+007F
629
     * 2 byte => U+0080  - U+07FF
630
     * 3 byte => U+0800  - U+FFFF
631
     * 4 byte => U+10000 - U+10FFFF
632
     *
633
     * @param string $str <p>The original unicode string.</p>
634
     *
635
     * @return int[] an array of byte lengths of each character
636
     */
637 4
    public static function chr_size_list(string $str): array
638
    {
639 4
        if ($str === '') {
640 4
            return [];
641
        }
642
643 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
644
            return \array_map(
645
                static function (string $data): int {
646
                    // "mb_" is available if overload is used, so use it ...
647
                    return \mb_strlen($data, 'CP850'); // 8-BIT
648
                },
649
                self::str_split($str)
650
            );
651
        }
652
653 4
        return \array_map('\strlen', self::str_split($str));
654
    }
655
656
    /**
657
     * Get a decimal code representation of a specific character.
658
     *
659
     * @param string $char <p>The input character.</p>
660
     *
661
     * @return int
662
     */
663 4
    public static function chr_to_decimal(string $char): int
664
    {
665 4
        $code = self::ord($char[0]);
666 4
        $bytes = 1;
667
668 4
        if (!($code & 0x80)) {
669
            // 0xxxxxxx
670 4
            return $code;
671
        }
672
673 4
        if (($code & 0xe0) === 0xc0) {
674
            // 110xxxxx
675 4
            $bytes = 2;
676 4
            $code &= ~0xc0;
677 4
        } elseif (($code & 0xf0) === 0xe0) {
678
            // 1110xxxx
679 4
            $bytes = 3;
680 4
            $code &= ~0xe0;
681 2
        } elseif (($code & 0xf8) === 0xf0) {
682
            // 11110xxx
683 2
            $bytes = 4;
684 2
            $code &= ~0xf0;
685
        }
686
687 4
        for ($i = 2; $i <= $bytes; ++$i) {
688
            // 10xxxxxx
689 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
690
        }
691
692 4
        return $code;
693
    }
694
695
    /**
696
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
697
     *
698
     * @param int|string $char   <p>The input character</p>
699
     * @param string     $prefix [optional]
700
     *
701
     * @return string The code point encoded as U+xxxx
702
     */
703 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
704
    {
705 2
        if ($char === '') {
706 2
            return '';
707
        }
708
709 2
        if ($char === '&#0;') {
710 2
            $char = '';
711
        }
712
713 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
714
    }
715
716
    /**
717
     * alias for "UTF8::chr_to_decimal()"
718
     *
719
     * @param string $chr
720
     *
721
     * @return int
722
     *
723
     * @see UTF8::chr_to_decimal()
724
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
725
     */
726 2
    public static function chr_to_int(string $chr): int
727
    {
728 2
        return self::chr_to_decimal($chr);
729
    }
730
731
    /**
732
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
733
     *
734
     * @param string $body         <p>The original string to be split.</p>
735
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
736
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
737
     *
738
     * @return string the chunked string
739
     */
740 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
741
    {
742 4
        return \implode($end, self::str_split($body, $chunk_length));
743
    }
744
745
    /**
746
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
747
     *
748
     * @param string $str                           <p>The string to be sanitized.</p>
749
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
750
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
751
     *                                              whitespace.</p>
752
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
753
     *                                              e.g.: "…"
754
     *                                              => "..."</p>
755
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
756
     *                                              combination with
757
     *                                              $normalize_whitespace</p>
758
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
759
     *                                              mark e.g.: "�"</p>
760
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
761
     *                                              characters e.g.: "\0"</p>
762
     *
763
     * @return string clean UTF-8 encoded string
764
     */
765 87
    public static function clean(
766
        string $str,
767
        bool $remove_bom = false,
768
        bool $normalize_whitespace = false,
769
        bool $normalize_msword = false,
770
        bool $keep_non_breaking_space = false,
771
        bool $replace_diamond_question_mark = false,
772
        bool $remove_invisible_characters = true
773
    ): string {
774
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
775
        // caused connection reset problem on larger strings
776
777 87
        $regex = '/
778
          (
779
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
780
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
781
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
782
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
783
            ){1,100}                      # ...one or more times
784
          )
785
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
786
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
787
        /x';
788
        /** @noinspection NotOptimalRegularExpressionsInspection */
789 87
        $str = (string) \preg_replace($regex, '$1', $str);
790
791 87
        if ($replace_diamond_question_mark === true) {
792 33
            $str = self::replace_diamond_question_mark($str, '');
793
        }
794
795 87
        if ($remove_invisible_characters === true) {
796 87
            $str = self::remove_invisible_characters($str);
797
        }
798
799 87
        if ($normalize_whitespace === true) {
800 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
801
        }
802
803 87
        if ($normalize_msword === true) {
804 4
            $str = self::normalize_msword($str);
805
        }
806
807 87
        if ($remove_bom === true) {
808 37
            $str = self::remove_bom($str);
809
        }
810
811 87
        return $str;
812
    }
813
814
    /**
815
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
816
     *
817
     * @param string $str <p>The input string.</p>
818
     *
819
     * @return string
820
     */
821 33
    public static function cleanup($str): string
822
    {
823
        // init
824 33
        $str = (string) $str;
825
826 33
        if ($str === '') {
827 5
            return '';
828
        }
829
830
        // fixed ISO <-> UTF-8 Errors
831 33
        $str = self::fix_simple_utf8($str);
832
833
        // remove all none UTF-8 symbols
834
        // && remove diamond question mark (�)
835
        // && remove remove invisible characters (e.g. "\0")
836
        // && remove BOM
837
        // && normalize whitespace chars (but keep non-breaking-spaces)
838 33
        return self::clean(
839 33
            $str,
840 33
            true,
841 33
            true,
842 33
            false,
843 33
            true,
844 33
            true,
845 33
            true
846
        );
847
    }
848
849
    /**
850
     * Accepts a string or a array of strings and returns an array of Unicode code points.
851
     *
852
     * INFO: opposite to UTF8::string()
853
     *
854
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
855
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
856
     *                                 default, code points will be returned as integers.</p>
857
     *
858
     * @return array<int|string>
859
     *                           The array of code points:<br>
860
     *                           array<int> for $u_style === false<br>
861
     *                           array<string> for $u_style === true<br>
862
     */
863 12
    public static function codepoints($arg, bool $u_style = false): array
864
    {
865 12
        if (\is_string($arg) === true) {
866 12
            $arg = self::str_split($arg);
867
        }
868
869 12
        $arg = \array_map(
870
            [
871 12
                self::class,
872
                'ord',
873
            ],
874 12
            $arg
875
        );
876
877 12
        if (\count($arg) === 0) {
878 7
            return [];
879
        }
880
881 11
        if ($u_style === true) {
882 2
            $arg = \array_map(
883
                [
884 2
                    self::class,
885
                    'int_to_hex',
886
                ],
887 2
                $arg
888
            );
889
        }
890
891 11
        return $arg;
892
    }
893
894
    /**
895
     * Trims the string and replaces consecutive whitespace characters with a
896
     * single space. This includes tabs and newline characters, as well as
897
     * multibyte whitespace such as the thin space and ideographic space.
898
     *
899
     * @param string $str <p>The input string.</p>
900
     *
901
     * @return string string with a trimmed $str and condensed whitespace
902
     */
903 13
    public static function collapse_whitespace(string $str): string
904
    {
905 13
        if (self::$SUPPORT['mbstring'] === true) {
906
            /** @noinspection PhpComposerExtensionStubsInspection */
907 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
908
        }
909
910
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
911
    }
912
913
    /**
914
     * Returns count of characters used in a string.
915
     *
916
     * @param string $str                     <p>The input string.</p>
917
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
918
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
919
     *
920
     * @return int[] an associative array of Character as keys and
921
     *               their count as values
922
     */
923 19
    public static function count_chars(
924
        string $str,
925
        bool $clean_utf8 = false,
926
        bool $try_to_use_mb_functions = true
927
    ): array {
928 19
        return \array_count_values(
929 19
            self::str_split(
930 19
                $str,
931 19
                1,
932 19
                $clean_utf8,
933 19
                $try_to_use_mb_functions
934
            )
935
        );
936
    }
937
938
    /**
939
     * Remove css media-queries.
940
     *
941
     * @param string $str
942
     *
943
     * @return string
944
     */
945 1
    public static function css_stripe_media_queries(string $str): string
946
    {
947 1
        return (string) \preg_replace(
948 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
949 1
            '',
950 1
            $str
951
        );
952
    }
953
954
    /**
955
     * Checks whether ctype is available on the server.
956
     *
957
     * @return bool
958
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
959
     */
960
    public static function ctype_loaded(): bool
961
    {
962
        return \extension_loaded('ctype');
963
    }
964
965
    /**
966
     * Converts an int value into a UTF-8 character.
967
     *
968
     * @param mixed $int
969
     *
970
     * @return string
971
     */
972 19
    public static function decimal_to_chr($int): string
973
    {
974 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
975
    }
976
977
    /**
978
     * Decodes a MIME header field
979
     *
980
     * @param string $str
981
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
982
     *
983
     * @return false|string
984
     *                      A decoded MIME field on success,
985
     *                      or false if an error occurs during the decoding
986
     */
987
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
988
    {
989
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
990
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
991
        }
992
993
        if (self::$SUPPORT['iconv'] === true) {
994
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
995
        }
996
997
        if ($encoding !== 'UTF-8') {
998
            $str = self::encode($encoding, $str);
999
        }
1000
1001
        return \mb_decode_mimeheader($str);
1002
    }
1003
1004
    /**
1005
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1006
     *
1007
     * @param string $str                            <p>The input string.</p>
1008
     * @param bool   $use_reversible_string_mappings [optional] <p>
1009
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1010
     *                                               between "emoji_encode" and "emoji_decode".</p>
1011
     *
1012
     * @return string
1013
     */
1014 9
    public static function emoji_decode(
1015
        string $str,
1016
        bool $use_reversible_string_mappings = false
1017
    ): string {
1018 9
        self::initEmojiData();
1019
1020 9
        if ($use_reversible_string_mappings === true) {
1021 9
            return (string) \str_replace(
1022 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1023 9
                (array) self::$EMOJI_VALUES_CACHE,
1024 9
                $str
1025
            );
1026
        }
1027
1028 1
        return (string) \str_replace(
1029 1
            (array) self::$EMOJI_KEYS_CACHE,
1030 1
            (array) self::$EMOJI_VALUES_CACHE,
1031 1
            $str
1032
        );
1033
    }
1034
1035
    /**
1036
     * Encode a string with emoji chars into a non-emoji string.
1037
     *
1038
     * @param string $str                            <p>The input string</p>
1039
     * @param bool   $use_reversible_string_mappings [optional] <p>
1040
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1041
     *                                               between "emoji_encode" and "emoji_decode"</p>
1042
     *
1043
     * @return string
1044
     */
1045 12
    public static function emoji_encode(
1046
        string $str,
1047
        bool $use_reversible_string_mappings = false
1048
    ): string {
1049 12
        self::initEmojiData();
1050
1051 12
        if ($use_reversible_string_mappings === true) {
1052 9
            return (string) \str_replace(
1053 9
                (array) self::$EMOJI_VALUES_CACHE,
1054 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1055 9
                $str
1056
            );
1057
        }
1058
1059 4
        return (string) \str_replace(
1060 4
            (array) self::$EMOJI_VALUES_CACHE,
1061 4
            (array) self::$EMOJI_KEYS_CACHE,
1062 4
            $str
1063
        );
1064
    }
1065
1066
    /**
1067
     * Encode a string with a new charset-encoding.
1068
     *
1069
     * INFO:  This function will also try to fix broken / double encoding,
1070
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1071
     *
1072
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1073
     * @param string $str                           <p>The input string</p>
1074
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1075
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1076
     *                                              string-encoding</p>
1077
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1078
     *                                              A empty string will trigger the autodetect anyway.</p>
1079
     *
1080
     * @return string
1081
     *
1082
     * @psalm-suppress InvalidReturnStatement
1083
     */
1084 28
    public static function encode(
1085
        string $to_encoding,
1086
        string $str,
1087
        bool $auto_detect_the_from_encoding = true,
1088
        string $from_encoding = ''
1089
    ): string {
1090 28
        if ($str === '' || $to_encoding === '') {
1091 13
            return $str;
1092
        }
1093
1094 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1095 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1096
        }
1097
1098 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1099 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1100
        }
1101
1102
        if (
1103 28
            $to_encoding
1104
            &&
1105 28
            $from_encoding
1106
            &&
1107 28
            $from_encoding === $to_encoding
1108
        ) {
1109
            return $str;
1110
        }
1111
1112 28
        if ($to_encoding === 'JSON') {
1113 1
            $return = self::json_encode($str);
1114 1
            if ($return === false) {
1115
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1116
            }
1117
1118 1
            return $return;
1119
        }
1120 28
        if ($from_encoding === 'JSON') {
1121 1
            $str = self::json_decode($str);
1122 1
            $from_encoding = '';
1123
        }
1124
1125 28
        if ($to_encoding === 'BASE64') {
1126 2
            return \base64_encode($str);
1127
        }
1128 28
        if ($from_encoding === 'BASE64') {
1129 2
            $str = \base64_decode($str, true);
1130 2
            $from_encoding = '';
1131
        }
1132
1133 28
        if ($to_encoding === 'HTML-ENTITIES') {
1134 2
            return self::html_encode($str, true, 'UTF-8');
1135
        }
1136 28
        if ($from_encoding === 'HTML-ENTITIES') {
1137 2
            $str = self::html_entity_decode($str, \ENT_COMPAT, 'UTF-8');
1138 2
            $from_encoding = '';
1139
        }
1140
1141 28
        $from_encoding_auto_detected = false;
1142
        if (
1143 28
            $auto_detect_the_from_encoding === true
1144
            ||
1145 28
            !$from_encoding
1146
        ) {
1147 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1148
        }
1149
1150
        // DEBUG
1151
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1152
1153 28
        if ($from_encoding_auto_detected !== false) {
1154
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1155 24
            $from_encoding = $from_encoding_auto_detected;
1156 7
        } elseif ($auto_detect_the_from_encoding === true) {
1157
            // fallback for the "autodetect"-mode
1158 7
            return self::to_utf8($str);
1159
        }
1160
1161
        if (
1162 24
            !$from_encoding
1163
            ||
1164 24
            $from_encoding === $to_encoding
1165
        ) {
1166 15
            return $str;
1167
        }
1168
1169
        if (
1170 19
            $to_encoding === 'UTF-8'
1171
            &&
1172
            (
1173 17
                $from_encoding === 'WINDOWS-1252'
1174
                ||
1175 19
                $from_encoding === 'ISO-8859-1'
1176
            )
1177
        ) {
1178 13
            return self::to_utf8($str);
1179
        }
1180
1181
        if (
1182 12
            $to_encoding === 'ISO-8859-1'
1183
            &&
1184
            (
1185 6
                $from_encoding === 'WINDOWS-1252'
1186
                ||
1187 12
                $from_encoding === 'UTF-8'
1188
            )
1189
        ) {
1190 6
            return self::to_iso8859($str);
1191
        }
1192
1193
        if (
1194 10
            $to_encoding !== 'UTF-8'
1195
            &&
1196 10
            $to_encoding !== 'ISO-8859-1'
1197
            &&
1198 10
            $to_encoding !== 'WINDOWS-1252'
1199
            &&
1200 10
            self::$SUPPORT['mbstring'] === false
1201
        ) {
1202
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1203
        }
1204
1205 10
        if (self::$SUPPORT['mbstring'] === true) {
1206
            // warning: do not use the symfony polyfill here
1207 10
            $str_encoded = \mb_convert_encoding(
1208 10
                $str,
1209 10
                $to_encoding,
1210 10
                $from_encoding
1211
            );
1212
1213 10
            if ($str_encoded) {
1214 10
                return $str_encoded;
1215
            }
1216
        }
1217
1218
        $return = \iconv($from_encoding, $to_encoding, $str);
1219
        if ($return !== false) {
1220
            return $return;
1221
        }
1222
1223
        return $str;
1224
    }
1225
1226
    /**
1227
     * @param string $str
1228
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1229
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1230
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1231
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1232
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1233
     *
1234
     * @return false|string
1235
     *                      <p>An encoded MIME field on success,
1236
     *                      or false if an error occurs during the encoding.</p>
1237
     */
1238
    public static function encode_mimeheader(
1239
        $str,
1240
        $from_charset = 'UTF-8',
1241
        $to_charset = 'UTF-8',
1242
        $transfer_encoding = 'Q',
1243
        $linefeed = '\\r\\n',
1244
        $indent = 76
1245
    ) {
1246
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1247
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1248
        }
1249
1250
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1251
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1252
        }
1253
1254
        return \iconv_mime_encode(
1255
            '',
1256
            $str,
1257
            [
1258
                'scheme'           => $transfer_encoding,
1259
                'line-length'      => $indent,
1260
                'input-charset'    => $from_charset,
1261
                'output-charset'   => $to_charset,
1262
                'line-break-chars' => $linefeed,
1263
            ]
1264
        );
1265
    }
1266
1267
    /**
1268
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1269
     *
1270
     * @param string   $str                       <p>The input string.</p>
1271
     * @param string   $search                    <p>The searched string.</p>
1272
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1273
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1274
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1275
     *
1276
     * @return string
1277
     */
1278 1
    public static function extract_text(
1279
        string $str,
1280
        string $search = '',
1281
        int $length = null,
1282
        string $replacer_for_skipped_text = '…',
1283
        string $encoding = 'UTF-8'
1284
    ): string {
1285 1
        if ($str === '') {
1286 1
            return '';
1287
        }
1288
1289 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1290
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1291
        }
1292
1293 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1294
1295 1
        if ($length === null) {
1296 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1297
        }
1298
1299 1
        if ($search === '') {
1300 1
            if ($encoding === 'UTF-8') {
1301 1
                if ($length > 0) {
1302 1
                    $string_length = (int) \mb_strlen($str);
1303 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1304
                } else {
1305 1
                    $end = 0;
1306
                }
1307
1308 1
                $pos = (int) \min(
1309 1
                    \mb_strpos($str, ' ', $end),
1310 1
                    \mb_strpos($str, '.', $end)
1311
                );
1312
            } else {
1313
                if ($length > 0) {
1314
                    $string_length = (int) self::strlen($str, $encoding);
1315
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1316
                } else {
1317
                    $end = 0;
1318
                }
1319
1320
                $pos = (int) \min(
1321
                    self::strpos($str, ' ', $end, $encoding),
1322
                    self::strpos($str, '.', $end, $encoding)
1323
                );
1324
            }
1325
1326 1
            if ($pos) {
1327 1
                if ($encoding === 'UTF-8') {
1328 1
                    $str_sub = \mb_substr($str, 0, $pos);
1329
                } else {
1330
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1331
                }
1332
1333 1
                if ($str_sub === false) {
1334
                    return '';
1335
                }
1336
1337 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1338
            }
1339
1340
            return $str;
1341
        }
1342
1343 1
        if ($encoding === 'UTF-8') {
1344 1
            $word_position = (int) \mb_stripos($str, $search);
1345 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1346
        } else {
1347
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1348
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1349
        }
1350
1351 1
        $pos_start = 0;
1352 1
        if ($half_side > 0) {
1353 1
            if ($encoding === 'UTF-8') {
1354 1
                $half_text = \mb_substr($str, 0, $half_side);
1355
            } else {
1356
                $half_text = self::substr($str, 0, $half_side, $encoding);
1357
            }
1358 1
            if ($half_text !== false) {
1359 1
                if ($encoding === 'UTF-8') {
1360 1
                    $pos_start = (int) \max(
1361 1
                        \mb_strrpos($half_text, ' '),
1362 1
                        \mb_strrpos($half_text, '.')
1363
                    );
1364
                } else {
1365
                    $pos_start = (int) \max(
1366
                        self::strrpos($half_text, ' ', 0, $encoding),
1367
                        self::strrpos($half_text, '.', 0, $encoding)
1368
                    );
1369
                }
1370
            }
1371
        }
1372
1373 1
        if ($word_position && $half_side > 0) {
1374 1
            $offset = $pos_start + $length - 1;
1375 1
            $real_length = (int) self::strlen($str, $encoding);
1376
1377 1
            if ($offset > $real_length) {
1378
                $offset = $real_length;
1379
            }
1380
1381 1
            if ($encoding === 'UTF-8') {
1382 1
                $pos_end = (int) \min(
1383 1
                    \mb_strpos($str, ' ', $offset),
1384 1
                    \mb_strpos($str, '.', $offset)
1385 1
                ) - $pos_start;
1386
            } else {
1387
                $pos_end = (int) \min(
1388
                    self::strpos($str, ' ', $offset, $encoding),
1389
                    self::strpos($str, '.', $offset, $encoding)
1390
                ) - $pos_start;
1391
            }
1392
1393 1
            if (!$pos_end || $pos_end <= 0) {
1394 1
                if ($encoding === 'UTF-8') {
1395 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1396
                } else {
1397
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1398
                }
1399 1
                if ($str_sub !== false) {
1400 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1401
                } else {
1402 1
                    $extract = '';
1403
                }
1404
            } else {
1405 1
                if ($encoding === 'UTF-8') {
1406 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1407
                } else {
1408
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1409
                }
1410 1
                if ($str_sub !== false) {
1411 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1412
                } else {
1413 1
                    $extract = '';
1414
                }
1415
            }
1416
        } else {
1417 1
            $offset = $length - 1;
1418 1
            $true_length = (int) self::strlen($str, $encoding);
1419
1420 1
            if ($offset > $true_length) {
1421
                $offset = $true_length;
1422
            }
1423
1424 1
            if ($encoding === 'UTF-8') {
1425 1
                $pos_end = (int) \min(
1426 1
                    \mb_strpos($str, ' ', $offset),
1427 1
                    \mb_strpos($str, '.', $offset)
1428
                );
1429
            } else {
1430
                $pos_end = (int) \min(
1431
                    self::strpos($str, ' ', $offset, $encoding),
1432
                    self::strpos($str, '.', $offset, $encoding)
1433
                );
1434
            }
1435
1436 1
            if ($pos_end) {
1437 1
                if ($encoding === 'UTF-8') {
1438 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1439
                } else {
1440
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1441
                }
1442 1
                if ($str_sub !== false) {
1443 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1444
                } else {
1445 1
                    $extract = '';
1446
                }
1447
            } else {
1448 1
                $extract = $str;
1449
            }
1450
        }
1451
1452 1
        return $extract;
1453
    }
1454
1455
    /**
1456
     * Reads entire file into a string.
1457
     *
1458
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1459
     *
1460
     * @see http://php.net/manual/en/function.file-get-contents.php
1461
     *
1462
     * @param string        $filename         <p>
1463
     *                                        Name of the file to read.
1464
     *                                        </p>
1465
     * @param bool          $use_include_path [optional] <p>
1466
     *                                        Prior to PHP 5, this parameter is called
1467
     *                                        use_include_path and is a bool.
1468
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1469
     *                                        to trigger include path
1470
     *                                        search.
1471
     *                                        </p>
1472
     * @param resource|null $context          [optional] <p>
1473
     *                                        A valid context resource created with
1474
     *                                        stream_context_create. If you don't need to use a
1475
     *                                        custom context, you can skip this parameter by &null;.
1476
     *                                        </p>
1477
     * @param int|null      $offset           [optional] <p>
1478
     *                                        The offset where the reading starts.
1479
     *                                        </p>
1480
     * @param int|null      $max_length       [optional] <p>
1481
     *                                        Maximum length of data read. The default is to read until end
1482
     *                                        of file is reached.
1483
     *                                        </p>
1484
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1485
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1486
     *                                        some files, because they used non default utf-8 chars. Binary files
1487
     *                                        like images or pdf will not be converted.</p>
1488
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1489
     *                                        A empty string will trigger the autodetect anyway.</p>
1490
     *
1491
     * @return false|string
1492
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1493
     */
1494 12
    public static function file_get_contents(
1495
        string $filename,
1496
        bool $use_include_path = false,
1497
        $context = null,
1498
        int $offset = null,
1499
        int $max_length = null,
1500
        int $timeout = 10,
1501
        bool $convert_to_utf8 = true,
1502
        string $from_encoding = ''
1503
    ) {
1504
        // init
1505 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1506
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1507 12
        if ($filename === false) {
1508
            return false;
1509
        }
1510
1511 12
        if ($timeout && $context === null) {
1512 9
            $context = \stream_context_create(
1513
                [
1514
                    'http' => [
1515 9
                        'timeout' => $timeout,
1516
                    ],
1517
                ]
1518
            );
1519
        }
1520
1521 12
        if ($offset === null) {
1522 12
            $offset = 0;
1523
        }
1524
1525 12
        if (\is_int($max_length) === true) {
1526 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1527
        } else {
1528 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1529
        }
1530
1531
        // return false on error
1532 12
        if ($data === false) {
1533
            return false;
1534
        }
1535
1536 12
        if ($convert_to_utf8 === true) {
1537
            if (
1538 12
                self::is_binary($data, true) !== true
1539
                ||
1540 9
                self::is_utf16($data, false) !== false
1541
                ||
1542 12
                self::is_utf32($data, false) !== false
1543
            ) {
1544 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1545 9
                $data = self::cleanup($data);
1546
            }
1547
        }
1548
1549 12
        return $data;
1550
    }
1551
1552
    /**
1553
     * Checks if a file starts with BOM (Byte Order Mark) character.
1554
     *
1555
     * @param string $file_path <p>Path to a valid file.</p>
1556
     *
1557
     * @throws \RuntimeException if file_get_contents() returned false
1558
     *
1559
     * @return bool
1560
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1561
     */
1562 2
    public static function file_has_bom(string $file_path): bool
1563
    {
1564 2
        $file_content = \file_get_contents($file_path);
1565 2
        if ($file_content === false) {
1566
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1567
        }
1568
1569 2
        return self::string_has_bom($file_content);
1570
    }
1571
1572
    /**
1573
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1574
     *
1575
     * @param mixed  $var
1576
     * @param int    $normalization_form
1577
     * @param string $leading_combining
1578
     *
1579
     * @return mixed
1580
     */
1581 62
    public static function filter(
1582
        $var,
1583
        int $normalization_form = \Normalizer::NFC,
1584
        string $leading_combining = '◌'
1585
    ) {
1586 62
        switch (\gettype($var)) {
1587 62
            case 'array':
1588
                /** @noinspection ForeachSourceInspection */
1589 6
                foreach ($var as $k => &$v) {
1590 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1591
                }
1592 6
                unset($v);
1593
1594 6
                break;
1595 62
            case 'object':
1596
                /** @noinspection ForeachSourceInspection */
1597 4
                foreach ($var as $k => &$v) {
1598 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1599
                }
1600 4
                unset($v);
1601
1602 4
                break;
1603 62
            case 'string':
1604
1605 62
                if (\strpos($var, "\r") !== false) {
1606
                    // Workaround https://bugs.php.net/65732
1607 3
                    $var = self::normalize_line_ending($var);
1608
                }
1609
1610 62
                if (ASCII::is_ascii($var) === false) {
1611 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1612 27
                        $n = '-';
1613
                    } else {
1614 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1615
1616 12
                        if (isset($n[0])) {
1617 7
                            $var = $n;
1618
                        } else {
1619 8
                            $var = self::encode('UTF-8', $var, true);
1620
                        }
1621
                    }
1622
1623
                    if (
1624 32
                        $var[0] >= "\x80"
1625
                        &&
1626 32
                        isset($n[0], $leading_combining[0])
1627
                        &&
1628 32
                        \preg_match('/^\\p{Mn}/u', $var)
1629
                    ) {
1630
                        // Prevent leading combining chars
1631
                        // for NFC-safe concatenations.
1632 3
                        $var = $leading_combining . $var;
1633
                    }
1634
                }
1635
1636 62
                break;
1637
        }
1638
1639 62
        return $var;
1640
    }
1641
1642
    /**
1643
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1644
     *
1645
     * Gets a specific external variable by name and optionally filters it
1646
     *
1647
     * @see http://php.net/manual/en/function.filter-input.php
1648
     *
1649
     * @param int    $type          <p>
1650
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1651
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1652
     *                              <b>INPUT_ENV</b>.
1653
     *                              </p>
1654
     * @param string $variable_name <p>
1655
     *                              Name of a variable to get.
1656
     *                              </p>
1657
     * @param int    $filter        [optional] <p>
1658
     *                              The ID of the filter to apply. The
1659
     *                              manual page lists the available filters.
1660
     *                              </p>
1661
     * @param mixed  $options       [optional] <p>
1662
     *                              Associative array of options or bitwise disjunction of flags. If filter
1663
     *                              accepts options, flags can be provided in "flags" field of array.
1664
     *                              </p>
1665
     *
1666
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1667
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1668
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1669
     */
1670
    public static function filter_input(
1671
        int $type,
1672
        string $variable_name,
1673
        int $filter = \FILTER_DEFAULT,
1674
        $options = null
1675
    ) {
1676
        if (\func_num_args() < 4) {
1677
            $var = \filter_input($type, $variable_name, $filter);
1678
        } else {
1679
            $var = \filter_input($type, $variable_name, $filter, $options);
1680
        }
1681
1682
        return self::filter($var);
1683
    }
1684
1685
    /**
1686
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1687
     *
1688
     * Gets external variables and optionally filters them
1689
     *
1690
     * @see http://php.net/manual/en/function.filter-input-array.php
1691
     *
1692
     * @param int   $type       <p>
1693
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1694
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1695
     *                          <b>INPUT_ENV</b>.
1696
     *                          </p>
1697
     * @param mixed $definition [optional] <p>
1698
     *                          An array defining the arguments. A valid key is a string
1699
     *                          containing a variable name and a valid value is either a filter type, or an array
1700
     *                          optionally specifying the filter, flags and options. If the value is an
1701
     *                          array, valid keys are filter which specifies the
1702
     *                          filter type,
1703
     *                          flags which specifies any flags that apply to the
1704
     *                          filter, and options which specifies any options that
1705
     *                          apply to the filter. See the example below for a better understanding.
1706
     *                          </p>
1707
     *                          <p>
1708
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1709
     *                          input array are filtered by this filter.
1710
     *                          </p>
1711
     * @param bool  $add_empty  [optional] <p>
1712
     *                          Add missing keys as <b>NULL</b> to the return value.
1713
     *                          </p>
1714
     *
1715
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1716
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1717
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1718
     *               is not set and <b>NULL</b> if the filter fails.
1719
     */
1720
    public static function filter_input_array(
1721
        int $type,
1722
        $definition = null,
1723
        bool $add_empty = true
1724
    ) {
1725
        if (\func_num_args() < 2) {
1726
            $a = \filter_input_array($type);
1727
        } else {
1728
            $a = \filter_input_array($type, $definition, $add_empty);
1729
        }
1730
1731
        return self::filter($a);
1732
    }
1733
1734
    /**
1735
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1736
     *
1737
     * Filters a variable with a specified filter
1738
     *
1739
     * @see http://php.net/manual/en/function.filter-var.php
1740
     *
1741
     * @param mixed $variable <p>
1742
     *                        Value to filter.
1743
     *                        </p>
1744
     * @param int   $filter   [optional] <p>
1745
     *                        The ID of the filter to apply. The
1746
     *                        manual page lists the available filters.
1747
     *                        </p>
1748
     * @param mixed $options  [optional] <p>
1749
     *                        Associative array of options or bitwise disjunction of flags. If filter
1750
     *                        accepts options, flags can be provided in "flags" field of array. For
1751
     *                        the "callback" filter, callable type should be passed. The
1752
     *                        callback must accept one argument, the value to be filtered, and return
1753
     *                        the value after filtering/sanitizing it.
1754
     *                        </p>
1755
     *                        <p>
1756
     *                        <code>
1757
     *                        // for filters that accept options, use this format
1758
     *                        $options = array(
1759
     *                        'options' => array(
1760
     *                        'default' => 3, // value to return if the filter fails
1761
     *                        // other options here
1762
     *                        'min_range' => 0
1763
     *                        ),
1764
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1765
     *                        );
1766
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1767
     *                        // for filter that only accept flags, you can pass them directly
1768
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1769
     *                        // for filter that only accept flags, you can also pass as an array
1770
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1771
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1772
     *                        // callback validate filter
1773
     *                        function foo($value)
1774
     *                        {
1775
     *                        // Expected format: Surname, GivenNames
1776
     *                        if (strpos($value, ", ") === false) return false;
1777
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1778
     *                        $empty = (empty($surname) || empty($givennames));
1779
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1780
     *                        if ($empty || $notstrings) {
1781
     *                        return false;
1782
     *                        } else {
1783
     *                        return $value;
1784
     *                        }
1785
     *                        }
1786
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1787
     *                        </code>
1788
     *                        </p>
1789
     *
1790
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1791
     */
1792 2
    public static function filter_var(
1793
        $variable,
1794
        int $filter = \FILTER_DEFAULT,
1795
        $options = null
1796
    ) {
1797 2
        if (\func_num_args() < 3) {
1798 2
            $variable = \filter_var($variable, $filter);
1799
        } else {
1800 2
            $variable = \filter_var($variable, $filter, $options);
1801
        }
1802
1803 2
        return self::filter($variable);
1804
    }
1805
1806
    /**
1807
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1808
     *
1809
     * Gets multiple variables and optionally filters them
1810
     *
1811
     * @see http://php.net/manual/en/function.filter-var-array.php
1812
     *
1813
     * @param array $data       <p>
1814
     *                          An array with string keys containing the data to filter.
1815
     *                          </p>
1816
     * @param mixed $definition [optional] <p>
1817
     *                          An array defining the arguments. A valid key is a string
1818
     *                          containing a variable name and a valid value is either a
1819
     *                          filter type, or an
1820
     *                          array optionally specifying the filter, flags and options.
1821
     *                          If the value is an array, valid keys are filter
1822
     *                          which specifies the filter type,
1823
     *                          flags which specifies any flags that apply to the
1824
     *                          filter, and options which specifies any options that
1825
     *                          apply to the filter. See the example below for a better understanding.
1826
     *                          </p>
1827
     *                          <p>
1828
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1829
     *                          input array are filtered by this filter.
1830
     *                          </p>
1831
     * @param bool  $add_empty  [optional] <p>
1832
     *                          Add missing keys as <b>NULL</b> to the return value.
1833
     *                          </p>
1834
     *
1835
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1836
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1837
     *               set
1838
     */
1839 2
    public static function filter_var_array(
1840
        array $data,
1841
        $definition = null,
1842
        bool $add_empty = true
1843
    ) {
1844 2
        if (\func_num_args() < 2) {
1845 2
            $a = \filter_var_array($data);
1846
        } else {
1847 2
            $a = \filter_var_array($data, $definition, $add_empty);
1848
        }
1849
1850 2
        return self::filter($a);
1851
    }
1852
1853
    /**
1854
     * Checks whether finfo is available on the server.
1855
     *
1856
     * @return bool
1857
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1858
     */
1859
    public static function finfo_loaded(): bool
1860
    {
1861
        return \class_exists('finfo');
1862
    }
1863
1864
    /**
1865
     * Returns the first $n characters of the string.
1866
     *
1867
     * @param string $str      <p>The input string.</p>
1868
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1869
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1870
     *
1871
     * @return string
1872
     */
1873 13
    public static function first_char(
1874
        string $str,
1875
        int $n = 1,
1876
        string $encoding = 'UTF-8'
1877
    ): string {
1878 13
        if ($str === '' || $n <= 0) {
1879 5
            return '';
1880
        }
1881
1882 8
        if ($encoding === 'UTF-8') {
1883 4
            return (string) \mb_substr($str, 0, $n);
1884
        }
1885
1886 4
        return (string) self::substr($str, 0, $n, $encoding);
1887
    }
1888
1889
    /**
1890
     * Check if the number of Unicode characters isn't greater than the specified integer.
1891
     *
1892
     * @param string $str      the original string to be checked
1893
     * @param int    $box_size the size in number of chars to be checked against string
1894
     *
1895
     * @return bool true if string is less than or equal to $box_size, false otherwise
1896
     */
1897 2
    public static function fits_inside(string $str, int $box_size): bool
1898
    {
1899 2
        return (int) self::strlen($str) <= $box_size;
1900
    }
1901
1902
    /**
1903
     * Try to fix simple broken UTF-8 strings.
1904
     *
1905
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1906
     *
1907
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1908
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1909
     * See: http://en.wikipedia.org/wiki/Windows-1252
1910
     *
1911
     * @param string $str <p>The input string</p>
1912
     *
1913
     * @return string
1914
     */
1915 46
    public static function fix_simple_utf8(string $str): string
1916
    {
1917 46
        if ($str === '') {
1918 4
            return '';
1919
        }
1920
1921 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1922 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1923
1924 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1925 1
            if (self::$BROKEN_UTF8_FIX === null) {
1926 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1927
            }
1928
1929 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1930 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1931
        }
1932
1933 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1934
    }
1935
1936
    /**
1937
     * Fix a double (or multiple) encoded UTF8 string.
1938
     *
1939
     * @param string|string[] $str you can use a string or an array of strings
1940
     *
1941
     * @return string|string[]
1942
     *                         Will return the fixed input-"array" or
1943
     *                         the fixed input-"string"
1944
     *
1945
     * @psalm-suppress InvalidReturnType
1946
     */
1947 2
    public static function fix_utf8($str)
1948
    {
1949 2
        if (\is_array($str) === true) {
1950 2
            foreach ($str as $k => &$v) {
1951 2
                $v = self::fix_utf8($v);
1952
            }
1953 2
            unset($v);
1954
1955
            /**
1956
             * @psalm-suppress InvalidReturnStatement
1957
             */
1958 2
            return $str;
1959
        }
1960
1961 2
        $str = (string) $str;
1962 2
        $last = '';
1963 2
        while ($last !== $str) {
1964 2
            $last = $str;
1965
            /**
1966
             * @psalm-suppress PossiblyInvalidArgument
1967
             */
1968 2
            $str = self::to_utf8(
1969 2
                self::utf8_decode($str, true)
1970
            );
1971
        }
1972
1973
        /**
1974
         * @psalm-suppress InvalidReturnStatement
1975
         */
1976 2
        return $str;
1977
    }
1978
1979
    /**
1980
     * Get character of a specific character.
1981
     *
1982
     * @param string $char
1983
     *
1984
     * @return string 'RTL' or 'LTR'
1985
     */
1986 2
    public static function getCharDirection(string $char): string
1987
    {
1988 2
        if (self::$SUPPORT['intlChar'] === true) {
1989
            /** @noinspection PhpComposerExtensionStubsInspection */
1990 2
            $tmp_return = \IntlChar::charDirection($char);
1991
1992
            // from "IntlChar"-Class
1993
            $char_direction = [
1994 2
                'RTL' => [1, 13, 14, 15, 21],
1995
                'LTR' => [0, 11, 12, 20],
1996
            ];
1997
1998 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
1999
                return 'LTR';
2000
            }
2001
2002 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2003 2
                return 'RTL';
2004
            }
2005
        }
2006
2007 2
        $c = static::chr_to_decimal($char);
2008
2009 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2010 2
            return 'LTR';
2011
        }
2012
2013 2
        if ($c <= 0x85e) {
2014 2
            if ($c === 0x5be ||
2015 2
                $c === 0x5c0 ||
2016 2
                $c === 0x5c3 ||
2017 2
                $c === 0x5c6 ||
2018 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2019 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2020 2
                $c === 0x608 ||
2021 2
                $c === 0x60b ||
2022 2
                $c === 0x60d ||
2023 2
                $c === 0x61b ||
2024 2
                ($c >= 0x61e && $c <= 0x64a) ||
2025
                ($c >= 0x66d && $c <= 0x66f) ||
2026
                ($c >= 0x671 && $c <= 0x6d5) ||
2027
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2028
                ($c >= 0x6ee && $c <= 0x6ef) ||
2029
                ($c >= 0x6fa && $c <= 0x70d) ||
2030
                $c === 0x710 ||
2031
                ($c >= 0x712 && $c <= 0x72f) ||
2032
                ($c >= 0x74d && $c <= 0x7a5) ||
2033
                $c === 0x7b1 ||
2034
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2035
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2036
                $c === 0x7fa ||
2037
                ($c >= 0x800 && $c <= 0x815) ||
2038
                $c === 0x81a ||
2039
                $c === 0x824 ||
2040
                $c === 0x828 ||
2041
                ($c >= 0x830 && $c <= 0x83e) ||
2042
                ($c >= 0x840 && $c <= 0x858) ||
2043 2
                $c === 0x85e
2044
            ) {
2045 2
                return 'RTL';
2046
            }
2047 2
        } elseif ($c === 0x200f) {
2048
            return 'RTL';
2049 2
        } elseif ($c >= 0xfb1d) {
2050 2
            if ($c === 0xfb1d ||
2051 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2052 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2053 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2054 2
                $c === 0xfb3e ||
2055 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2056 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2057 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2058 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2059 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2060 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2061 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2062 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2063 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2064 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2065 2
                $c === 0x10808 ||
2066 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2067 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2068 2
                $c === 0x1083c ||
2069 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2070 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2071 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2072 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2073 2
                $c === 0x1093f ||
2074 2
                $c === 0x10a00 ||
2075 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2076 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2077 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2078 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2079 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2080 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2081 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2082 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2083 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2084 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2085
            ) {
2086 2
                return 'RTL';
2087
            }
2088
        }
2089
2090 2
        return 'LTR';
2091
    }
2092
2093
    /**
2094
     * Check for php-support.
2095
     *
2096
     * @param string|null $key
2097
     *
2098
     * @return mixed
2099
     *               Return the full support-"array", if $key === null<br>
2100
     *               return bool-value, if $key is used and available<br>
2101
     *               otherwise return <strong>null</strong>
2102
     */
2103 27
    public static function getSupportInfo(string $key = null)
2104
    {
2105 27
        if ($key === null) {
2106 4
            return self::$SUPPORT;
2107
        }
2108
2109 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2110 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2111
        }
2112
        // compatibility fix for old versions
2113 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2114
2115 25
        return self::$SUPPORT[$key] ?? null;
2116
    }
2117
2118
    /**
2119
     * Warning: this method only works for some file-types (png, jpg)
2120
     *          if you need more supported types, please use e.g. "finfo"
2121
     *
2122
     * @param string $str
2123
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2124
     *
2125
     * @return array
2126
     *               with this keys: 'ext', 'mime', 'type'
2127
     */
2128 39
    public static function get_file_type(
2129
        string $str,
2130
        array $fallback = [
2131
            'ext'  => null,
2132
            'mime' => 'application/octet-stream',
2133
            'type' => null,
2134
        ]
2135
    ): array {
2136 39
        if ($str === '') {
2137
            return $fallback;
2138
        }
2139
2140
        /** @var string|false $str_info - needed for PhpStan (stubs error) */
2141 39
        $str_info = \substr($str, 0, 2);
2142 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2143 11
            return $fallback;
2144
        }
2145
2146
        // DEBUG
2147
        //var_dump($str_info);
2148
2149
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2150 35
        $str_info = \unpack('C2chars', $str_info);
0 ignored issues
show
Bug introduced by
$str_info of type array|false is incompatible with the type string expected by parameter $data of unpack(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2150
        $str_info = \unpack('C2chars', /** @scrutinizer ignore-type */ $str_info);
Loading history...
2151 35
        if ($str_info === false) {
2152
            return $fallback;
2153
        }
2154
        /** @noinspection OffsetOperationsInspection */
2155 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2156
2157
        // DEBUG
2158
        //var_dump($type_code);
2159
2160
        //
2161
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2162
        //
2163
        switch ($type_code) {
2164
            // WARNING: do not add too simple comparisons, because of false-positive results:
2165
            //
2166
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2167
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2168
            //
2169 35
            case 255216:
2170
                $ext = 'jpg';
2171
                $mime = 'image/jpeg';
2172
                $type = 'binary';
2173
2174
                break;
2175 35
            case 13780:
2176 7
                $ext = 'png';
2177 7
                $mime = 'image/png';
2178 7
                $type = 'binary';
2179
2180 7
                break;
2181
            default:
2182 34
                return $fallback;
2183
        }
2184
2185
        return [
2186 7
            'ext'  => $ext,
2187 7
            'mime' => $mime,
2188 7
            'type' => $type,
2189
        ];
2190
    }
2191
2192
    /**
2193
     * @param int    $length         <p>Length of the random string.</p>
2194
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2195
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2196
     *
2197
     * @return string
2198
     */
2199 1
    public static function get_random_string(
2200
        int $length,
2201
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2202
        string $encoding = 'UTF-8'
2203
    ): string {
2204
        // init
2205 1
        $i = 0;
2206 1
        $str = '';
2207
2208
        //
2209
        // add random chars
2210
        //
2211
2212 1
        if ($encoding === 'UTF-8') {
2213 1
            $max_length = (int) \mb_strlen($possible_chars);
2214 1
            if ($max_length === 0) {
2215 1
                return '';
2216
            }
2217
2218 1
            while ($i < $length) {
2219
                try {
2220 1
                    $rand_int = \random_int(0, $max_length - 1);
2221
                } catch (\Exception $e) {
2222
                    /** @noinspection RandomApiMigrationInspection */
2223
                    $rand_int = \mt_rand(0, $max_length - 1);
2224
                }
2225 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2226 1
                if ($char !== false) {
2227 1
                    $str .= $char;
2228 1
                    ++$i;
2229
                }
2230
            }
2231
        } else {
2232
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2233
2234
            $max_length = (int) self::strlen($possible_chars, $encoding);
2235
            if ($max_length === 0) {
2236
                return '';
2237
            }
2238
2239
            while ($i < $length) {
2240
                try {
2241
                    $rand_int = \random_int(0, $max_length - 1);
2242
                } catch (\Exception $e) {
2243
                    /** @noinspection RandomApiMigrationInspection */
2244
                    $rand_int = \mt_rand(0, $max_length - 1);
2245
                }
2246
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2247
                if ($char !== false) {
2248
                    $str .= $char;
2249
                    ++$i;
2250
                }
2251
            }
2252
        }
2253
2254 1
        return $str;
2255
    }
2256
2257
    /**
2258
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2259
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2260
     *
2261
     * @return string
2262
     */
2263 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2264
    {
2265 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2266 1
                        \session_id() .
2267 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2268 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2269 1
                        $entropy_extra;
2270
2271 1
        $unique_string = \uniqid($unique_helper, true);
2272
2273 1
        if ($use_md5) {
2274 1
            $unique_string = \md5($unique_string . $unique_helper);
2275
        }
2276
2277 1
        return $unique_string;
2278
    }
2279
2280
    /**
2281
     * alias for "UTF8::string_has_bom()"
2282
     *
2283
     * @param string $str
2284
     *
2285
     * @return bool
2286
     *
2287
     * @see UTF8::string_has_bom()
2288
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2289
     */
2290 2
    public static function hasBom(string $str): bool
2291
    {
2292 2
        return self::string_has_bom($str);
2293
    }
2294
2295
    /**
2296
     * Returns true if the string contains a lower case char, false otherwise.
2297
     *
2298
     * @param string $str <p>The input string.</p>
2299
     *
2300
     * @return bool whether or not the string contains a lower case character
2301
     */
2302 47
    public static function has_lowercase(string $str): bool
2303
    {
2304 47
        if (self::$SUPPORT['mbstring'] === true) {
2305
            /** @noinspection PhpComposerExtensionStubsInspection */
2306 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2307
        }
2308
2309
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2310
    }
2311
2312
    /**
2313
     * Returns true if the string contains an upper case char, false otherwise.
2314
     *
2315
     * @param string $str <p>The input string.</p>
2316
     *
2317
     * @return bool whether or not the string contains an upper case character
2318
     */
2319 12
    public static function has_uppercase(string $str): bool
2320
    {
2321 12
        if (self::$SUPPORT['mbstring'] === true) {
2322
            /** @noinspection PhpComposerExtensionStubsInspection */
2323 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2324
        }
2325
2326
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2327
    }
2328
2329
    /**
2330
     * Converts a hexadecimal value into a UTF-8 character.
2331
     *
2332
     * @param string $hexdec <p>The hexadecimal value.</p>
2333
     *
2334
     * @return false|string one single UTF-8 character
2335
     */
2336 4
    public static function hex_to_chr(string $hexdec)
2337
    {
2338 4
        return self::decimal_to_chr(\hexdec($hexdec));
2339
    }
2340
2341
    /**
2342
     * Converts hexadecimal U+xxxx code point representation to integer.
2343
     *
2344
     * INFO: opposite to UTF8::int_to_hex()
2345
     *
2346
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2347
     *
2348
     * @return false|int the code point, or false on failure
2349
     */
2350 2
    public static function hex_to_int($hexdec)
2351
    {
2352
        // init
2353 2
        $hexdec = (string) $hexdec;
2354
2355 2
        if ($hexdec === '') {
2356 2
            return false;
2357
        }
2358
2359 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2360 2
            return \intval($match[1], 16);
2361
        }
2362
2363 2
        return false;
2364
    }
2365
2366
    /**
2367
     * alias for "UTF8::html_entity_decode()"
2368
     *
2369
     * @param string $str
2370
     * @param int    $flags
2371
     * @param string $encoding
2372
     *
2373
     * @return string
2374
     *
2375
     * @see UTF8::html_entity_decode()
2376
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2377
     */
2378 2
    public static function html_decode(
2379
        string $str,
2380
        int $flags = null,
2381
        string $encoding = 'UTF-8'
2382
    ): string {
2383 2
        return self::html_entity_decode($str, $flags, $encoding);
2384
    }
2385
2386
    /**
2387
     * Converts a UTF-8 string to a series of HTML numbered entities.
2388
     *
2389
     * INFO: opposite to UTF8::html_decode()
2390
     *
2391
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2392
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2393
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2394
     *
2395
     * @return string HTML numbered entities
2396
     */
2397 14
    public static function html_encode(
2398
        string $str,
2399
        bool $keep_ascii_chars = false,
2400
        string $encoding = 'UTF-8'
2401
    ): string {
2402 14
        if ($str === '') {
2403 4
            return '';
2404
        }
2405
2406 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2407 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2408
        }
2409
2410
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2411 14
        if (self::$SUPPORT['mbstring'] === true) {
2412 14
            $start_code = 0x00;
2413 14
            if ($keep_ascii_chars === true) {
2414 13
                $start_code = 0x80;
2415
            }
2416
2417 14
            if ($encoding === 'UTF-8') {
2418
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2419 14
                $return = \mb_encode_numericentity(
2420 14
                    $str,
2421 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2422
                );
2423 14
                if ($return !== null && $return !== false) {
2424 14
                    return $return;
2425
                }
2426
            }
2427
2428
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2429 4
            $return = \mb_encode_numericentity(
2430 4
                $str,
2431 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2432 4
                $encoding
2433
            );
2434 4
            if ($return !== null && $return !== false) {
2435 4
                return $return;
2436
            }
2437
        }
2438
2439
        //
2440
        // fallback via vanilla php
2441
        //
2442
2443
        return \implode(
2444
            '',
2445
            \array_map(
2446
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2447
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2448
                },
2449
                self::str_split($str)
2450
            )
2451
        );
2452
    }
2453
2454
    /**
2455
     * UTF-8 version of html_entity_decode()
2456
     *
2457
     * The reason we are not using html_entity_decode() by itself is because
2458
     * while it is not technically correct to leave out the semicolon
2459
     * at the end of an entity most browsers will still interpret the entity
2460
     * correctly. html_entity_decode() does not convert entities without
2461
     * semicolons, so we are left with our own little solution here. Bummer.
2462
     *
2463
     * Convert all HTML entities to their applicable characters
2464
     *
2465
     * INFO: opposite to UTF8::html_encode()
2466
     *
2467
     * @see http://php.net/manual/en/function.html-entity-decode.php
2468
     *
2469
     * @param string $str      <p>
2470
     *                         The input string.
2471
     *                         </p>
2472
     * @param int    $flags    [optional] <p>
2473
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2474
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2475
     *                         <table>
2476
     *                         Available <i>flags</i> constants
2477
     *                         <tr valign="top">
2478
     *                         <td>Constant Name</td>
2479
     *                         <td>Description</td>
2480
     *                         </tr>
2481
     *                         <tr valign="top">
2482
     *                         <td><b>ENT_COMPAT</b></td>
2483
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2484
     *                         </tr>
2485
     *                         <tr valign="top">
2486
     *                         <td><b>ENT_QUOTES</b></td>
2487
     *                         <td>Will convert both double and single quotes.</td>
2488
     *                         </tr>
2489
     *                         <tr valign="top">
2490
     *                         <td><b>ENT_NOQUOTES</b></td>
2491
     *                         <td>Will leave both double and single quotes unconverted.</td>
2492
     *                         </tr>
2493
     *                         <tr valign="top">
2494
     *                         <td><b>ENT_HTML401</b></td>
2495
     *                         <td>
2496
     *                         Handle code as HTML 4.01.
2497
     *                         </td>
2498
     *                         </tr>
2499
     *                         <tr valign="top">
2500
     *                         <td><b>ENT_XML1</b></td>
2501
     *                         <td>
2502
     *                         Handle code as XML 1.
2503
     *                         </td>
2504
     *                         </tr>
2505
     *                         <tr valign="top">
2506
     *                         <td><b>ENT_XHTML</b></td>
2507
     *                         <td>
2508
     *                         Handle code as XHTML.
2509
     *                         </td>
2510
     *                         </tr>
2511
     *                         <tr valign="top">
2512
     *                         <td><b>ENT_HTML5</b></td>
2513
     *                         <td>
2514
     *                         Handle code as HTML 5.
2515
     *                         </td>
2516
     *                         </tr>
2517
     *                         </table>
2518
     *                         </p>
2519
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2520
     *
2521
     * @return string the decoded string
2522
     */
2523 46
    public static function html_entity_decode(
2524
        string $str,
2525
        int $flags = null,
2526
        string $encoding = 'UTF-8'
2527
    ): string {
2528
        if (
2529 46
            !isset($str[3]) // examples: &; || &x;
2530
            ||
2531 46
            \strpos($str, '&') === false // no "&"
2532
        ) {
2533 23
            return $str;
2534
        }
2535
2536 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2537 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2538
        }
2539
2540 44
        if ($flags === null) {
2541 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2542
        }
2543
2544
        if (
2545 44
            $encoding !== 'UTF-8'
2546
            &&
2547 44
            $encoding !== 'ISO-8859-1'
2548
            &&
2549 44
            $encoding !== 'WINDOWS-1252'
2550
            &&
2551 44
            self::$SUPPORT['mbstring'] === false
2552
        ) {
2553
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2554
        }
2555
2556
        do {
2557 44
            $str_compare = $str;
2558
2559
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2560 44
            if (self::$SUPPORT['mbstring'] === true) {
2561 44
                if ($encoding === 'UTF-8') {
2562
                    /** @var false|string|null $strTmp - needed for PhpStan (stubs error) */
2563 44
                    $strTmp = \mb_decode_numericentity(
2564 44
                        $str,
2565 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2566
                    );
2567
                } else {
2568
                    /** @var false|string|null $strTmp - needed for PhpStan (stubs error) */
2569 4
                    $strTmp = \mb_decode_numericentity(
2570 4
                        $str,
2571 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2572 4
                        $encoding
2573
                    );
2574
                }
2575 44
                if ($strTmp === null || $strTmp === false) {
2576 44
                    $str = self::html_entity_decode_helper($str, $encoding);
2577
                }
2578
            } else {
2579
                $str = self::html_entity_decode_helper($str, $encoding);
2580
            }
2581
2582 44
            if (\strpos($str, '&') !== false) {
2583 44
                if (\strpos($str, '&#') !== false) {
2584
                    // decode also numeric & UTF16 two byte entities
2585 36
                    $str = (string) \preg_replace(
2586 36
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2587 36
                        '$1;',
2588 36
                        $str
2589
                    );
2590
                }
2591
2592 44
                $str = \html_entity_decode(
2593 44
                    $str,
2594 44
                    $flags,
2595 44
                    $encoding
2596
                );
2597
            }
2598 44
        } while ($str_compare !== $str);
2599
2600 44
        return $str;
2601
    }
2602
2603
    /**
2604
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2605
     *
2606
     * @param string $str
2607
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2608
     *
2609
     * @return string
2610
     */
2611 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2612
    {
2613 6
        return self::htmlspecialchars(
2614 6
            $str,
2615 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2616 6
            $encoding
2617
        );
2618
    }
2619
2620
    /**
2621
     * Remove empty html-tag.
2622
     *
2623
     * e.g.: <tag></tag>
2624
     *
2625
     * @param string $str
2626
     *
2627
     * @return string
2628
     */
2629 1
    public static function html_stripe_empty_tags(string $str): string
2630
    {
2631 1
        return (string) \preg_replace(
2632 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2633 1
            '',
2634 1
            $str
2635
        );
2636
    }
2637
2638
    /**
2639
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2640
     *
2641
     * @see http://php.net/manual/en/function.htmlentities.php
2642
     *
2643
     * @param string $str           <p>
2644
     *                              The input string.
2645
     *                              </p>
2646
     * @param int    $flags         [optional] <p>
2647
     *                              A bitmask of one or more of the following flags, which specify how to handle
2648
     *                              quotes, invalid code unit sequences and the used document type. The default is
2649
     *                              ENT_COMPAT | ENT_HTML401.
2650
     *                              <table>
2651
     *                              Available <i>flags</i> constants
2652
     *                              <tr valign="top">
2653
     *                              <td>Constant Name</td>
2654
     *                              <td>Description</td>
2655
     *                              </tr>
2656
     *                              <tr valign="top">
2657
     *                              <td><b>ENT_COMPAT</b></td>
2658
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2659
     *                              </tr>
2660
     *                              <tr valign="top">
2661
     *                              <td><b>ENT_QUOTES</b></td>
2662
     *                              <td>Will convert both double and single quotes.</td>
2663
     *                              </tr>
2664
     *                              <tr valign="top">
2665
     *                              <td><b>ENT_NOQUOTES</b></td>
2666
     *                              <td>Will leave both double and single quotes unconverted.</td>
2667
     *                              </tr>
2668
     *                              <tr valign="top">
2669
     *                              <td><b>ENT_IGNORE</b></td>
2670
     *                              <td>
2671
     *                              Silently discard invalid code unit sequences instead of returning
2672
     *                              an empty string. Using this flag is discouraged as it
2673
     *                              may have security implications.
2674
     *                              </td>
2675
     *                              </tr>
2676
     *                              <tr valign="top">
2677
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2678
     *                              <td>
2679
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2680
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2681
     *                              string.
2682
     *                              </td>
2683
     *                              </tr>
2684
     *                              <tr valign="top">
2685
     *                              <td><b>ENT_DISALLOWED</b></td>
2686
     *                              <td>
2687
     *                              Replace invalid code points for the given document type with a
2688
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2689
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2690
     *                              instance, to ensure the well-formedness of XML documents with
2691
     *                              embedded external content.
2692
     *                              </td>
2693
     *                              </tr>
2694
     *                              <tr valign="top">
2695
     *                              <td><b>ENT_HTML401</b></td>
2696
     *                              <td>
2697
     *                              Handle code as HTML 4.01.
2698
     *                              </td>
2699
     *                              </tr>
2700
     *                              <tr valign="top">
2701
     *                              <td><b>ENT_XML1</b></td>
2702
     *                              <td>
2703
     *                              Handle code as XML 1.
2704
     *                              </td>
2705
     *                              </tr>
2706
     *                              <tr valign="top">
2707
     *                              <td><b>ENT_XHTML</b></td>
2708
     *                              <td>
2709
     *                              Handle code as XHTML.
2710
     *                              </td>
2711
     *                              </tr>
2712
     *                              <tr valign="top">
2713
     *                              <td><b>ENT_HTML5</b></td>
2714
     *                              <td>
2715
     *                              Handle code as HTML 5.
2716
     *                              </td>
2717
     *                              </tr>
2718
     *                              </table>
2719
     *                              </p>
2720
     * @param string $encoding      [optional] <p>
2721
     *                              Like <b>htmlspecialchars</b>,
2722
     *                              <b>htmlentities</b> takes an optional third argument
2723
     *                              <i>encoding</i> which defines encoding used in
2724
     *                              conversion.
2725
     *                              Although this argument is technically optional, you are highly
2726
     *                              encouraged to specify the correct value for your code.
2727
     *                              </p>
2728
     * @param bool   $double_encode [optional] <p>
2729
     *                              When <i>double_encode</i> is turned off PHP will not
2730
     *                              encode existing html entities. The default is to convert everything.
2731
     *                              </p>
2732
     *
2733
     * @return string
2734
     *                <p>
2735
     *                The encoded string.
2736
     *                <br><br>
2737
     *                If the input <i>string</i> contains an invalid code unit
2738
     *                sequence within the given <i>encoding</i> an empty string
2739
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2740
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2741
     *                </p>
2742
     */
2743 9
    public static function htmlentities(
2744
        string $str,
2745
        int $flags = \ENT_COMPAT,
2746
        string $encoding = 'UTF-8',
2747
        bool $double_encode = true
2748
    ): string {
2749 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2750 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2751
        }
2752
2753 9
        $str = \htmlentities(
2754 9
            $str,
2755 9
            $flags,
2756 9
            $encoding,
2757 9
            $double_encode
2758
        );
2759
2760
        /**
2761
         * PHP doesn't replace a backslash to its html entity since this is something
2762
         * that's mostly used to escape characters when inserting in a database. Since
2763
         * we're using a decent database layer, we don't need this shit and we're replacing
2764
         * the double backslashes by its' html entity equivalent.
2765
         *
2766
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2767
         */
2768 9
        $str = \str_replace('\\', '&#92;', $str);
2769
2770 9
        return self::html_encode($str, true, $encoding);
2771
    }
2772
2773
    /**
2774
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2775
     *
2776
     * INFO: Take a look at "UTF8::htmlentities()"
2777
     *
2778
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2779
     *
2780
     * @param string $str           <p>
2781
     *                              The string being converted.
2782
     *                              </p>
2783
     * @param int    $flags         [optional] <p>
2784
     *                              A bitmask of one or more of the following flags, which specify how to handle
2785
     *                              quotes, invalid code unit sequences and the used document type. The default is
2786
     *                              ENT_COMPAT | ENT_HTML401.
2787
     *                              <table>
2788
     *                              Available <i>flags</i> constants
2789
     *                              <tr valign="top">
2790
     *                              <td>Constant Name</td>
2791
     *                              <td>Description</td>
2792
     *                              </tr>
2793
     *                              <tr valign="top">
2794
     *                              <td><b>ENT_COMPAT</b></td>
2795
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2796
     *                              </tr>
2797
     *                              <tr valign="top">
2798
     *                              <td><b>ENT_QUOTES</b></td>
2799
     *                              <td>Will convert both double and single quotes.</td>
2800
     *                              </tr>
2801
     *                              <tr valign="top">
2802
     *                              <td><b>ENT_NOQUOTES</b></td>
2803
     *                              <td>Will leave both double and single quotes unconverted.</td>
2804
     *                              </tr>
2805
     *                              <tr valign="top">
2806
     *                              <td><b>ENT_IGNORE</b></td>
2807
     *                              <td>
2808
     *                              Silently discard invalid code unit sequences instead of returning
2809
     *                              an empty string. Using this flag is discouraged as it
2810
     *                              may have security implications.
2811
     *                              </td>
2812
     *                              </tr>
2813
     *                              <tr valign="top">
2814
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2815
     *                              <td>
2816
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2817
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2818
     *                              string.
2819
     *                              </td>
2820
     *                              </tr>
2821
     *                              <tr valign="top">
2822
     *                              <td><b>ENT_DISALLOWED</b></td>
2823
     *                              <td>
2824
     *                              Replace invalid code points for the given document type with a
2825
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2826
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2827
     *                              instance, to ensure the well-formedness of XML documents with
2828
     *                              embedded external content.
2829
     *                              </td>
2830
     *                              </tr>
2831
     *                              <tr valign="top">
2832
     *                              <td><b>ENT_HTML401</b></td>
2833
     *                              <td>
2834
     *                              Handle code as HTML 4.01.
2835
     *                              </td>
2836
     *                              </tr>
2837
     *                              <tr valign="top">
2838
     *                              <td><b>ENT_XML1</b></td>
2839
     *                              <td>
2840
     *                              Handle code as XML 1.
2841
     *                              </td>
2842
     *                              </tr>
2843
     *                              <tr valign="top">
2844
     *                              <td><b>ENT_XHTML</b></td>
2845
     *                              <td>
2846
     *                              Handle code as XHTML.
2847
     *                              </td>
2848
     *                              </tr>
2849
     *                              <tr valign="top">
2850
     *                              <td><b>ENT_HTML5</b></td>
2851
     *                              <td>
2852
     *                              Handle code as HTML 5.
2853
     *                              </td>
2854
     *                              </tr>
2855
     *                              </table>
2856
     *                              </p>
2857
     * @param string $encoding      [optional] <p>
2858
     *                              Defines encoding used in conversion.
2859
     *                              </p>
2860
     *                              <p>
2861
     *                              For the purposes of this function, the encodings
2862
     *                              ISO-8859-1, ISO-8859-15,
2863
     *                              UTF-8, cp866,
2864
     *                              cp1251, cp1252, and
2865
     *                              KOI8-R are effectively equivalent, provided the
2866
     *                              <i>string</i> itself is valid for the encoding, as
2867
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2868
     *                              the same positions in all of these encodings.
2869
     *                              </p>
2870
     * @param bool   $double_encode [optional] <p>
2871
     *                              When <i>double_encode</i> is turned off PHP will not
2872
     *                              encode existing html entities, the default is to convert everything.
2873
     *                              </p>
2874
     *
2875
     * @return string the converted string.
2876
     *                </p>
2877
     *                <p>
2878
     *                If the input <i>string</i> contains an invalid code unit
2879
     *                sequence within the given <i>encoding</i> an empty string
2880
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2881
     *                <b>ENT_SUBSTITUTE</b> flags are set
2882
     */
2883 8
    public static function htmlspecialchars(
2884
        string $str,
2885
        int $flags = \ENT_COMPAT,
2886
        string $encoding = 'UTF-8',
2887
        bool $double_encode = true
2888
    ): string {
2889 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2890 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2891
        }
2892
2893 8
        return \htmlspecialchars(
2894 8
            $str,
2895 8
            $flags,
2896 8
            $encoding,
2897 8
            $double_encode
2898
        );
2899
    }
2900
2901
    /**
2902
     * Checks whether iconv is available on the server.
2903
     *
2904
     * @return bool
2905
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2906
     */
2907
    public static function iconv_loaded(): bool
2908
    {
2909
        return \extension_loaded('iconv');
2910
    }
2911
2912
    /**
2913
     * alias for "UTF8::decimal_to_chr()"
2914
     *
2915
     * @param mixed $int
2916
     *
2917
     * @return string
2918
     *
2919
     * @see UTF8::decimal_to_chr()
2920
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
2921
     */
2922 4
    public static function int_to_chr($int): string
2923
    {
2924 4
        return self::decimal_to_chr($int);
2925
    }
2926
2927
    /**
2928
     * Converts Integer to hexadecimal U+xxxx code point representation.
2929
     *
2930
     * INFO: opposite to UTF8::hex_to_int()
2931
     *
2932
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2933
     * @param string $prefix [optional]
2934
     *
2935
     * @return string the code point, or empty string on failure
2936
     */
2937 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2938
    {
2939 6
        $hex = \dechex($int);
2940
2941 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2942
2943 6
        return $prefix . $hex . '';
2944
    }
2945
2946
    /**
2947
     * Checks whether intl-char is available on the server.
2948
     *
2949
     * @return bool
2950
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2951
     */
2952
    public static function intlChar_loaded(): bool
2953
    {
2954
        return \class_exists('IntlChar');
2955
    }
2956
2957
    /**
2958
     * Checks whether intl is available on the server.
2959
     *
2960
     * @return bool
2961
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2962
     */
2963 5
    public static function intl_loaded(): bool
2964
    {
2965 5
        return \extension_loaded('intl');
2966
    }
2967
2968
    /**
2969
     * alias for "UTF8::is_ascii()"
2970
     *
2971
     * @param string $str
2972
     *
2973
     * @return bool
2974
     *
2975
     * @see UTF8::is_ascii()
2976
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2977
     */
2978 2
    public static function isAscii(string $str): bool
2979
    {
2980 2
        return ASCII::is_ascii($str);
2981
    }
2982
2983
    /**
2984
     * alias for "UTF8::is_base64()"
2985
     *
2986
     * @param string $str
2987
     *
2988
     * @return bool
2989
     *
2990
     * @see UTF8::is_base64()
2991
     * @deprecated <p>please use "UTF8::is_base64()"</p>
2992
     */
2993 2
    public static function isBase64($str): bool
2994
    {
2995 2
        return self::is_base64($str);
2996
    }
2997
2998
    /**
2999
     * alias for "UTF8::is_binary()"
3000
     *
3001
     * @param mixed $str
3002
     * @param bool  $strict
3003
     *
3004
     * @return bool
3005
     *
3006
     * @see UTF8::is_binary()
3007
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3008
     */
3009 4
    public static function isBinary($str, $strict = false): bool
3010
    {
3011 4
        return self::is_binary($str, $strict);
3012
    }
3013
3014
    /**
3015
     * alias for "UTF8::is_bom()"
3016
     *
3017
     * @param string $utf8_chr
3018
     *
3019
     * @return bool
3020
     *
3021
     * @see UTF8::is_bom()
3022
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3023
     */
3024 2
    public static function isBom(string $utf8_chr): bool
3025
    {
3026 2
        return self::is_bom($utf8_chr);
3027
    }
3028
3029
    /**
3030
     * alias for "UTF8::is_html()"
3031
     *
3032
     * @param string $str
3033
     *
3034
     * @return bool
3035
     *
3036
     * @see UTF8::is_html()
3037
     * @deprecated <p>please use "UTF8::is_html()"</p>
3038
     */
3039 2
    public static function isHtml(string $str): bool
3040
    {
3041 2
        return self::is_html($str);
3042
    }
3043
3044
    /**
3045
     * alias for "UTF8::is_json()"
3046
     *
3047
     * @param string $str
3048
     *
3049
     * @return bool
3050
     *
3051
     * @see UTF8::is_json()
3052
     * @deprecated <p>please use "UTF8::is_json()"</p>
3053
     */
3054
    public static function isJson(string $str): bool
3055
    {
3056
        return self::is_json($str);
3057
    }
3058
3059
    /**
3060
     * alias for "UTF8::is_utf16()"
3061
     *
3062
     * @param mixed $str
3063
     *
3064
     * @return false|int
3065
     *                   <strong>false</strong> if is't not UTF16,<br>
3066
     *                   <strong>1</strong> for UTF-16LE,<br>
3067
     *                   <strong>2</strong> for UTF-16BE
3068
     *
3069
     * @see UTF8::is_utf16()
3070
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3071
     */
3072 2
    public static function isUtf16($str)
3073
    {
3074 2
        return self::is_utf16($str);
3075
    }
3076
3077
    /**
3078
     * alias for "UTF8::is_utf32()"
3079
     *
3080
     * @param mixed $str
3081
     *
3082
     * @return false|int
3083
     *                   <strong>false</strong> if is't not UTF16,
3084
     *                   <strong>1</strong> for UTF-32LE,
3085
     *                   <strong>2</strong> for UTF-32BE
3086
     *
3087
     * @see UTF8::is_utf32()
3088
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3089
     */
3090 2
    public static function isUtf32($str)
3091
    {
3092 2
        return self::is_utf32($str);
3093
    }
3094
3095
    /**
3096
     * alias for "UTF8::is_utf8()"
3097
     *
3098
     * @param string $str
3099
     * @param bool   $strict
3100
     *
3101
     * @return bool
3102
     *
3103
     * @see UTF8::is_utf8()
3104
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3105
     */
3106 17
    public static function isUtf8($str, $strict = false): bool
3107
    {
3108 17
        return self::is_utf8($str, $strict);
3109
    }
3110
3111
    /**
3112
     * Returns true if the string contains only alphabetic chars, false otherwise.
3113
     *
3114
     * @param string $str
3115
     *
3116
     * @return bool
3117
     *              Whether or not $str contains only alphabetic chars
3118
     */
3119 10
    public static function is_alpha(string $str): bool
3120
    {
3121 10
        if (self::$SUPPORT['mbstring'] === true) {
3122
            /** @noinspection PhpComposerExtensionStubsInspection */
3123 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3124
        }
3125
3126
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3127
    }
3128
3129
    /**
3130
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3131
     *
3132
     * @param string $str
3133
     *
3134
     * @return bool
3135
     *              Whether or not $str contains only alphanumeric chars
3136
     */
3137 13
    public static function is_alphanumeric(string $str): bool
3138
    {
3139 13
        if (self::$SUPPORT['mbstring'] === true) {
3140
            /** @noinspection PhpComposerExtensionStubsInspection */
3141 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3142
        }
3143
3144
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3145
    }
3146
3147
    /**
3148
     * Checks if a string is 7 bit ASCII.
3149
     *
3150
     * @param string $str <p>The string to check.</p>
3151
     *
3152
     * @return bool
3153
     *              <strong>true</strong> if it is ASCII<br>
3154
     *              <strong>false</strong> otherwise
3155
     */
3156 8
    public static function is_ascii(string $str): bool
3157
    {
3158 8
        return ASCII::is_ascii($str);
3159
    }
3160
3161
    /**
3162
     * Returns true if the string is base64 encoded, false otherwise.
3163
     *
3164
     * @param mixed|string $str                   <p>The input string.</p>
3165
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3166
     *
3167
     * @return bool whether or not $str is base64 encoded
3168
     */
3169 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3170
    {
3171
        if (
3172 16
            $empty_string_is_valid === false
3173
            &&
3174 16
            $str === ''
3175
        ) {
3176 3
            return false;
3177
        }
3178
3179
        /**
3180
         * @psalm-suppress RedundantConditionGivenDocblockType
3181
         */
3182 15
        if (\is_string($str) === false) {
3183 2
            return false;
3184
        }
3185
3186 15
        $base64String = \base64_decode($str, true);
3187
3188 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3189
    }
3190
3191
    /**
3192
     * Check if the input is binary... (is look like a hack).
3193
     *
3194
     * @param mixed $input
3195
     * @param bool  $strict
3196
     *
3197
     * @return bool
3198
     */
3199 39
    public static function is_binary($input, bool $strict = false): bool
3200
    {
3201 39
        $input = (string) $input;
3202 39
        if ($input === '') {
3203 10
            return false;
3204
        }
3205
3206 39
        if (\preg_match('~^[01]+$~', $input)) {
3207 13
            return true;
3208
        }
3209
3210 39
        $ext = self::get_file_type($input);
3211 39
        if ($ext['type'] === 'binary') {
3212 7
            return true;
3213
        }
3214
3215 38
        $test_length = \strlen($input);
3216 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3217 38
        if (($test_null_counting / $test_length) > 0.25) {
3218 15
            return true;
3219
        }
3220
3221 34
        if ($strict === true) {
3222 34
            if (self::$SUPPORT['finfo'] === false) {
3223
                throw new \RuntimeException('ext-fileinfo: is not installed');
3224
            }
3225
3226
            /** @noinspection PhpComposerExtensionStubsInspection */
3227 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3228 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3229 15
                return true;
3230
            }
3231
        }
3232
3233 30
        return false;
3234
    }
3235
3236
    /**
3237
     * Check if the file is binary.
3238
     *
3239
     * @param string $file
3240
     *
3241
     * @return bool
3242
     */
3243 6
    public static function is_binary_file($file): bool
3244
    {
3245
        // init
3246 6
        $block = '';
3247
3248 6
        $fp = \fopen($file, 'rb');
3249 6
        if (\is_resource($fp)) {
3250 6
            $block = \fread($fp, 512);
3251 6
            \fclose($fp);
3252
        }
3253
3254 6
        if ($block === '') {
3255 2
            return false;
3256
        }
3257
3258 6
        return self::is_binary($block, true);
3259
    }
3260
3261
    /**
3262
     * Returns true if the string contains only whitespace chars, false otherwise.
3263
     *
3264
     * @param string $str
3265
     *
3266
     * @return bool
3267
     *              Whether or not $str contains only whitespace characters
3268
     */
3269 15
    public static function is_blank(string $str): bool
3270
    {
3271 15
        if (self::$SUPPORT['mbstring'] === true) {
3272
            /** @noinspection PhpComposerExtensionStubsInspection */
3273 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3274
        }
3275
3276
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3277
    }
3278
3279
    /**
3280
     * Checks if the given string is equal to any "Byte Order Mark".
3281
     *
3282
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3283
     *
3284
     * @param string $str <p>The input string.</p>
3285
     *
3286
     * @return bool
3287
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3288
     */
3289 2
    public static function is_bom($str): bool
3290
    {
3291
        /** @noinspection PhpUnusedLocalVariableInspection */
3292 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3293 2
            if ($str === $bom_string) {
3294 2
                return true;
3295
            }
3296
        }
3297
3298 2
        return false;
3299
    }
3300
3301
    /**
3302
     * Determine whether the string is considered to be empty.
3303
     *
3304
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3305
     * empty() does not generate a warning if the variable does not exist.
3306
     *
3307
     * @param mixed $str
3308
     *
3309
     * @return bool whether or not $str is empty()
3310
     */
3311
    public static function is_empty($str): bool
3312
    {
3313
        return empty($str);
3314
    }
3315
3316
    /**
3317
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3318
     *
3319
     * @param string $str
3320
     *
3321
     * @return bool
3322
     *              Whether or not $str contains only hexadecimal chars
3323
     */
3324 13
    public static function is_hexadecimal(string $str): bool
3325
    {
3326 13
        if (self::$SUPPORT['mbstring'] === true) {
3327
            /** @noinspection PhpComposerExtensionStubsInspection */
3328 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3329
        }
3330
3331
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3332
    }
3333
3334
    /**
3335
     * Check if the string contains any HTML tags.
3336
     *
3337
     * @param string $str <p>The input string.</p>
3338
     *
3339
     * @return bool
3340
     */
3341 3
    public static function is_html(string $str): bool
3342
    {
3343 3
        if ($str === '') {
3344 3
            return false;
3345
        }
3346
3347
        // init
3348 3
        $matches = [];
3349
3350 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3351
3352 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3353
3354 3
        return \count($matches) !== 0;
3355
    }
3356
3357
    /**
3358
     * Try to check if "$str" is a JSON-string.
3359
     *
3360
     * @param string $str                                    <p>The input string.</p>
3361
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3362
     *
3363
     * @return bool
3364
     */
3365 42
    public static function is_json(
3366
        string $str,
3367
        $only_array_or_object_results_are_valid = true
3368
    ): bool {
3369 42
        if ($str === '') {
3370 4
            return false;
3371
        }
3372
3373 40
        if (self::$SUPPORT['json'] === false) {
3374
            throw new \RuntimeException('ext-json: is not installed');
3375
        }
3376
3377 40
        $json = self::json_decode($str);
3378 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3379 18
            return false;
3380
        }
3381
3382
        if (
3383 24
            $only_array_or_object_results_are_valid === true
3384
            &&
3385 24
            \is_object($json) === false
3386
            &&
3387 24
            \is_array($json) === false
3388
        ) {
3389 5
            return false;
3390
        }
3391
3392
        /** @noinspection PhpComposerExtensionStubsInspection */
3393 19
        return \json_last_error() === \JSON_ERROR_NONE;
3394
    }
3395
3396
    /**
3397
     * @param string $str
3398
     *
3399
     * @return bool
3400
     */
3401 8
    public static function is_lowercase(string $str): bool
3402
    {
3403 8
        if (self::$SUPPORT['mbstring'] === true) {
3404
            /** @noinspection PhpComposerExtensionStubsInspection */
3405 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3406
        }
3407
3408
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3409
    }
3410
3411
    /**
3412
     * Returns true if the string is serialized, false otherwise.
3413
     *
3414
     * @param string $str
3415
     *
3416
     * @return bool whether or not $str is serialized
3417
     */
3418 7
    public static function is_serialized(string $str): bool
3419
    {
3420 7
        if ($str === '') {
3421 1
            return false;
3422
        }
3423
3424
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3425
        /** @noinspection UnserializeExploitsInspection */
3426 6
        return $str === 'b:0;'
3427
               ||
3428 6
               @\unserialize($str) !== false;
3429
    }
3430
3431
    /**
3432
     * Returns true if the string contains only lower case chars, false
3433
     * otherwise.
3434
     *
3435
     * @param string $str <p>The input string.</p>
3436
     *
3437
     * @return bool
3438
     *              <p>Whether or not $str contains only lower case characters.</p>
3439
     */
3440 8
    public static function is_uppercase(string $str): bool
3441
    {
3442 8
        if (self::$SUPPORT['mbstring'] === true) {
3443
            /** @noinspection PhpComposerExtensionStubsInspection */
3444 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3445
        }
3446
3447
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3448
    }
3449
3450
    /**
3451
     * Check if the string is UTF-16.
3452
     *
3453
     * @param mixed $str                       <p>The input string.</p>
3454
     * @param bool  $check_if_string_is_binary
3455
     *
3456
     * @return false|int
3457
     *                   <strong>false</strong> if is't not UTF-16,<br>
3458
     *                   <strong>1</strong> for UTF-16LE,<br>
3459
     *                   <strong>2</strong> for UTF-16BE
3460
     */
3461 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3462
    {
3463
        // init
3464 22
        $str = (string) $str;
3465 22
        $str_chars = [];
3466
3467
        if (
3468 22
            $check_if_string_is_binary === true
3469
            &&
3470 22
            self::is_binary($str, true) === false
3471
        ) {
3472 2
            return false;
3473
        }
3474
3475 22
        if (self::$SUPPORT['mbstring'] === false) {
3476 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3477
        }
3478
3479 22
        $str = self::remove_bom($str);
3480
3481 22
        $maybe_utf16le = 0;
3482 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3483 22
        if ($test) {
3484 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3485 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3486 15
            if ($test3 === $test) {
3487 15
                if (\count($str_chars) === 0) {
3488 15
                    $str_chars = self::count_chars($str, true, false);
3489
                }
3490 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3491 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3492 15
                        ++$maybe_utf16le;
3493
                    }
3494
                }
3495 15
                unset($test3charEmpty);
3496
            }
3497
        }
3498
3499 22
        $maybe_utf16be = 0;
3500 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3501 22
        if ($test) {
3502 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3503 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3504 15
            if ($test3 === $test) {
3505 15
                if (\count($str_chars) === 0) {
3506 7
                    $str_chars = self::count_chars($str, true, false);
3507
                }
3508 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3509 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3510 15
                        ++$maybe_utf16be;
3511
                    }
3512
                }
3513 15
                unset($test3charEmpty);
3514
            }
3515
        }
3516
3517 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3518 7
            if ($maybe_utf16le > $maybe_utf16be) {
3519 5
                return 1;
3520
            }
3521
3522 6
            return 2;
3523
        }
3524
3525 18
        return false;
3526
    }
3527
3528
    /**
3529
     * Check if the string is UTF-32.
3530
     *
3531
     * @param mixed $str                       <p>The input string.</p>
3532
     * @param bool  $check_if_string_is_binary
3533
     *
3534
     * @return false|int
3535
     *                   <strong>false</strong> if is't not UTF-32,<br>
3536
     *                   <strong>1</strong> for UTF-32LE,<br>
3537
     *                   <strong>2</strong> for UTF-32BE
3538
     */
3539 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3540
    {
3541
        // init
3542 20
        $str = (string) $str;
3543 20
        $str_chars = [];
3544
3545
        if (
3546 20
            $check_if_string_is_binary === true
3547
            &&
3548 20
            self::is_binary($str, true) === false
3549
        ) {
3550 2
            return false;
3551
        }
3552
3553 20
        if (self::$SUPPORT['mbstring'] === false) {
3554 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3555
        }
3556
3557 20
        $str = self::remove_bom($str);
3558
3559 20
        $maybe_utf32le = 0;
3560 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3561 20
        if ($test) {
3562 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3563 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3564 13
            if ($test3 === $test) {
3565 13
                if (\count($str_chars) === 0) {
3566 13
                    $str_chars = self::count_chars($str, true, false);
3567
                }
3568 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3569 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3570 13
                        ++$maybe_utf32le;
3571
                    }
3572
                }
3573 13
                unset($test3charEmpty);
3574
            }
3575
        }
3576
3577 20
        $maybe_utf32be = 0;
3578 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3579 20
        if ($test) {
3580 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3581 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3582 13
            if ($test3 === $test) {
3583 13
                if (\count($str_chars) === 0) {
3584 7
                    $str_chars = self::count_chars($str, true, false);
3585
                }
3586 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3587 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3588 13
                        ++$maybe_utf32be;
3589
                    }
3590
                }
3591 13
                unset($test3charEmpty);
3592
            }
3593
        }
3594
3595 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3596 3
            if ($maybe_utf32le > $maybe_utf32be) {
3597 2
                return 1;
3598
            }
3599
3600 3
            return 2;
3601
        }
3602
3603 20
        return false;
3604
    }
3605
3606
    /**
3607
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3608
     *
3609
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3610
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3611
     *
3612
     * @return bool
3613
     */
3614 82
    public static function is_utf8($str, bool $strict = false): bool
3615
    {
3616 82
        if (\is_array($str) === true) {
3617 2
            foreach ($str as &$v) {
3618 2
                if (self::is_utf8($v, $strict) === false) {
3619 2
                    return false;
3620
                }
3621
            }
3622
3623
            return true;
3624
        }
3625
3626 82
        return self::is_utf8_string((string) $str, $strict);
3627
    }
3628
3629
    /**
3630
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3631
     * Decodes a JSON string
3632
     *
3633
     * @see http://php.net/manual/en/function.json-decode.php
3634
     *
3635
     * @param string $json    <p>
3636
     *                        The <i>json</i> string being decoded.
3637
     *                        </p>
3638
     *                        <p>
3639
     *                        This function only works with UTF-8 encoded strings.
3640
     *                        </p>
3641
     *                        <p>PHP implements a superset of
3642
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3643
     *                        only supports these values when they are nested inside an array or an object.
3644
     *                        </p>
3645
     * @param bool   $assoc   [optional] <p>
3646
     *                        When <b>TRUE</b>, returned objects will be converted into
3647
     *                        associative arrays.
3648
     *                        </p>
3649
     * @param int    $depth   [optional] <p>
3650
     *                        User specified recursion depth.
3651
     *                        </p>
3652
     * @param int    $options [optional] <p>
3653
     *                        Bitmask of JSON decode options. Currently only
3654
     *                        <b>JSON_BIGINT_AS_STRING</b>
3655
     *                        is supported (default is to cast large integers as floats)
3656
     *                        </p>
3657
     *
3658
     * @return mixed
3659
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3660
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3661
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3662
     *               is deeper than the recursion limit.
3663
     */
3664 43
    public static function json_decode(
3665
        string $json,
3666
        bool $assoc = false,
3667
        int $depth = 512,
3668
        int $options = 0
3669
    ) {
3670 43
        $json = self::filter($json);
3671
3672 43
        if (self::$SUPPORT['json'] === false) {
3673
            throw new \RuntimeException('ext-json: is not installed');
3674
        }
3675
3676
        /** @noinspection PhpComposerExtensionStubsInspection */
3677 43
        return \json_decode($json, $assoc, $depth, $options);
3678
    }
3679
3680
    /**
3681
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3682
     * Returns the JSON representation of a value.
3683
     *
3684
     * @see http://php.net/manual/en/function.json-encode.php
3685
     *
3686
     * @param mixed $value   <p>
3687
     *                       The <i>value</i> being encoded. Can be any type except
3688
     *                       a resource.
3689
     *                       </p>
3690
     *                       <p>
3691
     *                       All string data must be UTF-8 encoded.
3692
     *                       </p>
3693
     *                       <p>PHP implements a superset of
3694
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3695
     *                       only supports these values when they are nested inside an array or an object.
3696
     *                       </p>
3697
     * @param int   $options [optional] <p>
3698
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3699
     *                       <b>JSON_HEX_TAG</b>,
3700
     *                       <b>JSON_HEX_AMP</b>,
3701
     *                       <b>JSON_HEX_APOS</b>,
3702
     *                       <b>JSON_NUMERIC_CHECK</b>,
3703
     *                       <b>JSON_PRETTY_PRINT</b>,
3704
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3705
     *                       <b>JSON_FORCE_OBJECT</b>,
3706
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3707
     *                       constants is described on
3708
     *                       the JSON constants page.
3709
     *                       </p>
3710
     * @param int   $depth   [optional] <p>
3711
     *                       Set the maximum depth. Must be greater than zero.
3712
     *                       </p>
3713
     *
3714
     * @return false|string
3715
     *                      A JSON encoded <strong>string</strong> on success or<br>
3716
     *                      <strong>FALSE</strong> on failure
3717
     */
3718 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3719
    {
3720 5
        $value = self::filter($value);
3721
3722 5
        if (self::$SUPPORT['json'] === false) {
3723
            throw new \RuntimeException('ext-json: is not installed');
3724
        }
3725
3726
        /** @noinspection PhpComposerExtensionStubsInspection */
3727 5
        return \json_encode($value, $options, $depth);
3728
    }
3729
3730
    /**
3731
     * Checks whether JSON is available on the server.
3732
     *
3733
     * @return bool
3734
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3735
     */
3736
    public static function json_loaded(): bool
3737
    {
3738
        return \function_exists('json_decode');
3739
    }
3740
3741
    /**
3742
     * Makes string's first char lowercase.
3743
     *
3744
     * @param string      $str                           <p>The input string</p>
3745
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3746
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3747
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3748
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3749
     *
3750
     * @return string the resulting string
3751
     */
3752 46
    public static function lcfirst(
3753
        string $str,
3754
        string $encoding = 'UTF-8',
3755
        bool $clean_utf8 = false,
3756
        string $lang = null,
3757
        bool $try_to_keep_the_string_length = false
3758
    ): string {
3759 46
        if ($clean_utf8 === true) {
3760
            $str = self::clean($str);
3761
        }
3762
3763 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3764
3765 46
        if ($encoding === 'UTF-8') {
3766 43
            $str_part_two = (string) \mb_substr($str, 1);
3767
3768 43
            if ($use_mb_functions === true) {
3769 43
                $str_part_one = \mb_strtolower(
3770 43
                    (string) \mb_substr($str, 0, 1)
3771
                );
3772
            } else {
3773
                $str_part_one = self::strtolower(
3774
                    (string) \mb_substr($str, 0, 1),
3775
                    $encoding,
3776
                    false,
3777
                    $lang,
3778 43
                    $try_to_keep_the_string_length
3779
                );
3780
            }
3781
        } else {
3782 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3783
3784 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3785
3786 3
            $str_part_one = self::strtolower(
3787 3
                (string) self::substr($str, 0, 1, $encoding),
3788 3
                $encoding,
3789 3
                false,
3790 3
                $lang,
3791 3
                $try_to_keep_the_string_length
3792
            );
3793
        }
3794
3795 46
        return $str_part_one . $str_part_two;
3796
    }
3797
3798
    /**
3799
     * alias for "UTF8::lcfirst()"
3800
     *
3801
     * @param string      $str
3802
     * @param string      $encoding
3803
     * @param bool        $clean_utf8
3804
     * @param string|null $lang
3805
     * @param bool        $try_to_keep_the_string_length
3806
     *
3807
     * @return string
3808
     *
3809
     * @see UTF8::lcfirst()
3810
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3811
     */
3812 2
    public static function lcword(
3813
        string $str,
3814
        string $encoding = 'UTF-8',
3815
        bool $clean_utf8 = false,
3816
        string $lang = null,
3817
        bool $try_to_keep_the_string_length = false
3818
    ): string {
3819 2
        return self::lcfirst(
3820 2
            $str,
3821 2
            $encoding,
3822 2
            $clean_utf8,
3823 2
            $lang,
3824 2
            $try_to_keep_the_string_length
3825
        );
3826
    }
3827
3828
    /**
3829
     * Lowercase for all words in the string.
3830
     *
3831
     * @param string      $str                           <p>The input string.</p>
3832
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3833
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3834
     *                                                   a new word.</p>
3835
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3836
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3837
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3838
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3839
     *
3840
     * @return string
3841
     */
3842 2
    public static function lcwords(
3843
        string $str,
3844
        array $exceptions = [],
3845
        string $char_list = '',
3846
        string $encoding = 'UTF-8',
3847
        bool $clean_utf8 = false,
3848
        string $lang = null,
3849
        bool $try_to_keep_the_string_length = false
3850
    ): string {
3851 2
        if (!$str) {
3852 2
            return '';
3853
        }
3854
3855 2
        $words = self::str_to_words($str, $char_list);
3856 2
        $use_exceptions = \count($exceptions) > 0;
3857
3858 2
        foreach ($words as &$word) {
3859 2
            if (!$word) {
3860 2
                continue;
3861
            }
3862
3863
            if (
3864 2
                $use_exceptions === false
3865
                ||
3866 2
                !\in_array($word, $exceptions, true)
3867
            ) {
3868 2
                $word = self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3869
            }
3870
        }
3871
3872 2
        return \implode('', $words);
3873
    }
3874
3875
    /**
3876
     * alias for "UTF8::lcfirst()"
3877
     *
3878
     * @param string      $str
3879
     * @param string      $encoding
3880
     * @param bool        $clean_utf8
3881
     * @param string|null $lang
3882
     * @param bool        $try_to_keep_the_string_length
3883
     *
3884
     * @return string
3885
     *
3886
     * @see UTF8::lcfirst()
3887
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3888
     */
3889 5
    public static function lowerCaseFirst(
3890
        string $str,
3891
        string $encoding = 'UTF-8',
3892
        bool $clean_utf8 = false,
3893
        string $lang = null,
3894
        bool $try_to_keep_the_string_length = false
3895
    ): string {
3896 5
        return self::lcfirst(
3897 5
            $str,
3898 5
            $encoding,
3899 5
            $clean_utf8,
3900 5
            $lang,
3901 5
            $try_to_keep_the_string_length
3902
        );
3903
    }
3904
3905
    /**
3906
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
3907
     *
3908
     * @param string      $str   <p>The string to be trimmed</p>
3909
     * @param string|null $chars <p>Optional characters to be stripped</p>
3910
     *
3911
     * @return string the string with unwanted characters stripped from the left
3912
     */
3913 22
    public static function ltrim(string $str = '', string $chars = null): string
3914
    {
3915 22
        if ($str === '') {
3916 3
            return '';
3917
        }
3918
3919 21
        if ($chars) {
3920 10
            $chars = \preg_quote($chars, '/');
3921 10
            $pattern = "^[${chars}]+";
3922
        } else {
3923 14
            $pattern = '^[\\s]+';
3924
        }
3925
3926 21
        if (self::$SUPPORT['mbstring'] === true) {
3927
            /** @noinspection PhpComposerExtensionStubsInspection */
3928 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3929
        }
3930
3931
        return self::regex_replace($str, $pattern, '', '', '/');
3932
    }
3933
3934
    /**
3935
     * Returns the UTF-8 character with the maximum code point in the given data.
3936
     *
3937
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3938
     *
3939
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3940
     */
3941 2
    public static function max($arg)
3942
    {
3943 2
        if (\is_array($arg) === true) {
3944 2
            $arg = \implode('', $arg);
3945
        }
3946
3947 2
        $codepoints = self::codepoints($arg, false);
3948 2
        if (\count($codepoints) === 0) {
3949 2
            return null;
3950
        }
3951
3952 2
        $codepoint_max = \max($codepoints);
3953
3954 2
        return self::chr($codepoint_max);
3955
    }
3956
3957
    /**
3958
     * Calculates and returns the maximum number of bytes taken by any
3959
     * UTF-8 encoded character in the given string.
3960
     *
3961
     * @param string $str <p>The original Unicode string.</p>
3962
     *
3963
     * @return int max byte lengths of the given chars
3964
     */
3965 2
    public static function max_chr_width(string $str): int
3966
    {
3967 2
        $bytes = self::chr_size_list($str);
3968 2
        if (\count($bytes) > 0) {
3969 2
            return (int) \max($bytes);
3970
        }
3971
3972 2
        return 0;
3973
    }
3974
3975
    /**
3976
     * Checks whether mbstring is available on the server.
3977
     *
3978
     * @return bool
3979
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3980
     */
3981 28
    public static function mbstring_loaded(): bool
3982
    {
3983 28
        return \extension_loaded('mbstring');
3984
    }
3985
3986
    /**
3987
     * Returns the UTF-8 character with the minimum code point in the given data.
3988
     *
3989
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3990
     *
3991
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
3992
     */
3993 2
    public static function min($arg)
3994
    {
3995 2
        if (\is_array($arg) === true) {
3996 2
            $arg = \implode('', $arg);
3997
        }
3998
3999 2
        $codepoints = self::codepoints($arg, false);
4000 2
        if (\count($codepoints) === 0) {
4001 2
            return null;
4002
        }
4003
4004 2
        $codepoint_min = \min($codepoints);
4005
4006 2
        return self::chr($codepoint_min);
4007
    }
4008
4009
    /**
4010
     * alias for "UTF8::normalize_encoding()"
4011
     *
4012
     * @param mixed $encoding
4013
     * @param mixed $fallback
4014
     *
4015
     * @return mixed
4016
     *
4017
     * @see UTF8::normalize_encoding()
4018
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4019
     */
4020 2
    public static function normalizeEncoding($encoding, $fallback = '')
4021
    {
4022 2
        return self::normalize_encoding($encoding, $fallback);
4023
    }
4024
4025
    /**
4026
     * Normalize the encoding-"name" input.
4027
     *
4028
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4029
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4030
     *
4031
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4032
     */
4033 331
    public static function normalize_encoding($encoding, $fallback = '')
4034
    {
4035 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4036
4037
        // init
4038 331
        $encoding = (string) $encoding;
4039
4040 331
        if (!$encoding) {
4041 285
            return $fallback;
4042
        }
4043
4044
        if (
4045 51
            $encoding === 'UTF-8'
4046
            ||
4047 51
            $encoding === 'UTF8'
4048
        ) {
4049 26
            return 'UTF-8';
4050
        }
4051
4052
        if (
4053 43
            $encoding === '8BIT'
4054
            ||
4055 43
            $encoding === 'BINARY'
4056
        ) {
4057
            return 'CP850';
4058
        }
4059
4060
        if (
4061 43
            $encoding === 'HTML'
4062
            ||
4063 43
            $encoding === 'HTML-ENTITIES'
4064
        ) {
4065 2
            return 'HTML-ENTITIES';
4066
        }
4067
4068
        if (
4069 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4070
            ||
4071 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4072
        ) {
4073 1
            return $fallback;
4074
        }
4075
4076 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4077 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4078
        }
4079
4080 6
        if (self::$ENCODINGS === null) {
4081 1
            self::$ENCODINGS = self::getData('encodings');
4082
        }
4083
4084 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4085 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4086
4087 4
            return $encoding;
4088
        }
4089
4090 5
        $encoding_original = $encoding;
4091 5
        $encoding = \strtoupper($encoding);
4092 5
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4093
4094
        $equivalences = [
4095 5
            'ISO8859'     => 'ISO-8859-1',
4096
            'ISO88591'    => 'ISO-8859-1',
4097
            'ISO'         => 'ISO-8859-1',
4098
            'LATIN'       => 'ISO-8859-1',
4099
            'LATIN1'      => 'ISO-8859-1', // Western European
4100
            'ISO88592'    => 'ISO-8859-2',
4101
            'LATIN2'      => 'ISO-8859-2', // Central European
4102
            'ISO88593'    => 'ISO-8859-3',
4103
            'LATIN3'      => 'ISO-8859-3', // Southern European
4104
            'ISO88594'    => 'ISO-8859-4',
4105
            'LATIN4'      => 'ISO-8859-4', // Northern European
4106
            'ISO88595'    => 'ISO-8859-5',
4107
            'ISO88596'    => 'ISO-8859-6', // Greek
4108
            'ISO88597'    => 'ISO-8859-7',
4109
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4110
            'ISO88599'    => 'ISO-8859-9',
4111
            'LATIN5'      => 'ISO-8859-9', // Turkish
4112
            'ISO885911'   => 'ISO-8859-11',
4113
            'TIS620'      => 'ISO-8859-11', // Thai
4114
            'ISO885910'   => 'ISO-8859-10',
4115
            'LATIN6'      => 'ISO-8859-10', // Nordic
4116
            'ISO885913'   => 'ISO-8859-13',
4117
            'LATIN7'      => 'ISO-8859-13', // Baltic
4118
            'ISO885914'   => 'ISO-8859-14',
4119
            'LATIN8'      => 'ISO-8859-14', // Celtic
4120
            'ISO885915'   => 'ISO-8859-15',
4121
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4122
            'ISO885916'   => 'ISO-8859-16',
4123
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4124
            'CP1250'      => 'WINDOWS-1250',
4125
            'WIN1250'     => 'WINDOWS-1250',
4126
            'WINDOWS1250' => 'WINDOWS-1250',
4127
            'CP1251'      => 'WINDOWS-1251',
4128
            'WIN1251'     => 'WINDOWS-1251',
4129
            'WINDOWS1251' => 'WINDOWS-1251',
4130
            'CP1252'      => 'WINDOWS-1252',
4131
            'WIN1252'     => 'WINDOWS-1252',
4132
            'WINDOWS1252' => 'WINDOWS-1252',
4133
            'CP1253'      => 'WINDOWS-1253',
4134
            'WIN1253'     => 'WINDOWS-1253',
4135
            'WINDOWS1253' => 'WINDOWS-1253',
4136
            'CP1254'      => 'WINDOWS-1254',
4137
            'WIN1254'     => 'WINDOWS-1254',
4138
            'WINDOWS1254' => 'WINDOWS-1254',
4139
            'CP1255'      => 'WINDOWS-1255',
4140
            'WIN1255'     => 'WINDOWS-1255',
4141
            'WINDOWS1255' => 'WINDOWS-1255',
4142
            'CP1256'      => 'WINDOWS-1256',
4143
            'WIN1256'     => 'WINDOWS-1256',
4144
            'WINDOWS1256' => 'WINDOWS-1256',
4145
            'CP1257'      => 'WINDOWS-1257',
4146
            'WIN1257'     => 'WINDOWS-1257',
4147
            'WINDOWS1257' => 'WINDOWS-1257',
4148
            'CP1258'      => 'WINDOWS-1258',
4149
            'WIN1258'     => 'WINDOWS-1258',
4150
            'WINDOWS1258' => 'WINDOWS-1258',
4151
            'UTF16'       => 'UTF-16',
4152
            'UTF32'       => 'UTF-32',
4153
            'UTF8'        => 'UTF-8',
4154
            'UTF'         => 'UTF-8',
4155
            'UTF7'        => 'UTF-7',
4156
            '8BIT'        => 'CP850',
4157
            'BINARY'      => 'CP850',
4158
        ];
4159
4160 5
        if (!empty($equivalences[$encoding_upper_helper])) {
4161 4
            $encoding = $equivalences[$encoding_upper_helper];
4162
        }
4163
4164 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4165
4166 5
        return $encoding;
4167
    }
4168
4169
    /**
4170
     * Standardize line ending to unix-like.
4171
     *
4172
     * @param string $str
4173
     *
4174
     * @return string
4175
     */
4176 5
    public static function normalize_line_ending(string $str): string
4177
    {
4178 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4179
    }
4180
4181
    /**
4182
     * Normalize some MS Word special characters.
4183
     *
4184
     * @param string $str <p>The string to be normalized.</p>
4185
     *
4186
     * @return string
4187
     */
4188 10
    public static function normalize_msword(string $str): string
4189
    {
4190 10
        return ASCII::normalize_msword($str);
4191
    }
4192
4193
    /**
4194
     * Normalize the whitespace.
4195
     *
4196
     * @param string $str                        <p>The string to be normalized.</p>
4197
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4198
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4199
     *                                           bidirectional text chars.</p>
4200
     *
4201
     * @return string
4202
     */
4203 61
    public static function normalize_whitespace(
4204
        string $str,
4205
        bool $keep_non_breaking_space = false,
4206
        bool $keep_bidi_unicode_controls = false
4207
    ): string {
4208 61
        return ASCII::normalize_whitespace(
4209 61
            $str,
4210 61
            $keep_non_breaking_space,
4211 61
            $keep_bidi_unicode_controls
4212
        );
4213
    }
4214
4215
    /**
4216
     * Calculates Unicode code point of the given UTF-8 encoded character.
4217
     *
4218
     * INFO: opposite to UTF8::chr()
4219
     *
4220
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4221
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4222
     *
4223
     * @return int
4224
     *             Unicode code point of the given character,<br>
4225
     *             0 on invalid UTF-8 byte sequence
4226
     */
4227 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4228
    {
4229 30
        static $CHAR_CACHE = [];
4230
4231
        // init
4232 30
        $chr = (string) $chr;
4233
4234 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4235 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4236
        }
4237
4238 30
        $cache_key = $chr . $encoding;
4239 30
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4240 30
            return $CHAR_CACHE[$cache_key];
4241
        }
4242
4243
        // check again, if it's still not UTF-8
4244 12
        if ($encoding !== 'UTF-8') {
4245 3
            $chr = self::encode($encoding, $chr);
4246
        }
4247
4248 12
        if (self::$ORD === null) {
4249
            self::$ORD = self::getData('ord');
4250
        }
4251
4252 12
        if (isset(self::$ORD[$chr])) {
4253 12
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4254
        }
4255
4256
        //
4257
        // fallback via "IntlChar"
4258
        //
4259
4260 6
        if (self::$SUPPORT['intlChar'] === true) {
4261
            /** @noinspection PhpComposerExtensionStubsInspection */
4262 5
            $code = \IntlChar::ord($chr);
4263 5
            if ($code) {
4264 5
                return $CHAR_CACHE[$cache_key] = $code;
4265
            }
4266
        }
4267
4268
        //
4269
        // fallback via vanilla php
4270
        //
4271
4272
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4273 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4274
        /** @noinspection OffsetOperationsInspection */
4275 1
        $code = $chr ? $chr[1] : 0;
4276
4277
        /** @noinspection OffsetOperationsInspection */
4278 1
        if ($code >= 0xF0 && isset($chr[4])) {
4279
            /** @noinspection UnnecessaryCastingInspection */
4280
            /** @noinspection OffsetOperationsInspection */
4281
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4282
        }
4283
4284
        /** @noinspection OffsetOperationsInspection */
4285 1
        if ($code >= 0xE0 && isset($chr[3])) {
4286
            /** @noinspection UnnecessaryCastingInspection */
4287
            /** @noinspection OffsetOperationsInspection */
4288 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4289
        }
4290
4291
        /** @noinspection OffsetOperationsInspection */
4292 1
        if ($code >= 0xC0 && isset($chr[2])) {
4293
            /** @noinspection UnnecessaryCastingInspection */
4294
            /** @noinspection OffsetOperationsInspection */
4295 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4296
        }
4297
4298
        return $CHAR_CACHE[$cache_key] = $code;
4299
    }
4300
4301
    /**
4302
     * Parses the string into an array (into the the second parameter).
4303
     *
4304
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4305
     *          if the second parameter is not set!
4306
     *
4307
     * @see http://php.net/manual/en/function.parse-str.php
4308
     *
4309
     * @param string $str        <p>The input string.</p>
4310
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4311
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4312
     *
4313
     * @return bool
4314
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4315
     */
4316 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4317
    {
4318 2
        if ($clean_utf8 === true) {
4319 2
            $str = self::clean($str);
4320
        }
4321
4322 2
        if (self::$SUPPORT['mbstring'] === true) {
4323 2
            $return = \mb_parse_str($str, $result);
4324
4325 2
            return $return !== false && $result !== [];
4326
        }
4327
4328
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4329
        \parse_str($str, $result);
4330
4331
        return $result !== [];
4332
    }
4333
4334
    /**
4335
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4336
     *
4337
     * @return bool
4338
     *              <strong>true</strong> if support is available,<br>
4339
     *              <strong>false</strong> otherwise
4340
     */
4341 102
    public static function pcre_utf8_support(): bool
4342
    {
4343
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4344 102
        return (bool) @\preg_match('//u', '');
4345
    }
4346
4347
    /**
4348
     * Create an array containing a range of UTF-8 characters.
4349
     *
4350
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4351
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4352
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4353
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4354
     * @param float|int $step      [optional] <p>
4355
     *                             If a step value is given, it will be used as the
4356
     *                             increment between elements in the sequence. step
4357
     *                             should be given as a positive number. If not specified,
4358
     *                             step will default to 1.
4359
     *                             </p>
4360
     *
4361
     * @return string[]
4362
     */
4363 2
    public static function range(
4364
        $var1,
4365
        $var2,
4366
        bool $use_ctype = true,
4367
        string $encoding = 'UTF-8',
4368
        $step = 1
4369
    ): array {
4370 2
        if (!$var1 || !$var2) {
4371 2
            return [];
4372
        }
4373
4374 2
        if ($step !== 1) {
4375 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4376
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4377
            }
4378
4379 1
            if ($step <= 0) {
4380
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4381
            }
4382
        }
4383
4384 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4385
            throw new \RuntimeException('ext-ctype: is not installed');
4386
        }
4387
4388 2
        $is_digit = false;
4389 2
        $is_xdigit = false;
4390
4391
        /** @noinspection PhpComposerExtensionStubsInspection */
4392 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4393 2
            $is_digit = true;
4394 2
            $start = (int) $var1;
4395 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4396
            $is_xdigit = true;
4397
            $start = (int) self::hex_to_int($var1);
4398 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4399 1
            $start = (int) $var1;
4400
        } else {
4401 2
            $start = self::ord($var1);
4402
        }
4403
4404 2
        if (!$start) {
4405
            return [];
4406
        }
4407
4408 2
        if ($is_digit) {
4409 2
            $end = (int) $var2;
4410 2
        } elseif ($is_xdigit) {
4411
            $end = (int) self::hex_to_int($var2);
4412 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4413 1
            $end = (int) $var2;
4414
        } else {
4415 2
            $end = self::ord($var2);
4416
        }
4417
4418 2
        if (!$end) {
4419
            return [];
4420
        }
4421
4422 2
        $array = [];
4423 2
        foreach (\range($start, $end, $step) as $i) {
4424 2
            $array[] = (string) self::chr((int) $i, $encoding);
4425
        }
4426
4427 2
        return $array;
4428
    }
4429
4430
    /**
4431
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4432
     *
4433
     * e.g:
4434
     * 'test+test'                     => 'test+test'
4435
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4436
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4437
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4438
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4439
     * 'Düsseldorf'                   => 'Düsseldorf'
4440
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4441
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4442
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4443
     *
4444
     * @param string $str          <p>The input string.</p>
4445
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4446
     *
4447
     * @return string
4448
     */
4449 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4450
    {
4451 6
        if ($str === '') {
4452 4
            return '';
4453
        }
4454
4455
        if (
4456 6
            \strpos($str, '&') === false
4457
            &&
4458 6
            \strpos($str, '%') === false
4459
            &&
4460 6
            \strpos($str, '+') === false
4461
            &&
4462 6
            \strpos($str, '\u') === false
4463
        ) {
4464 4
            return self::fix_simple_utf8($str);
4465
        }
4466
4467 6
        $str = self::urldecode_unicode_helper($str);
4468
4469
        do {
4470 6
            $str_compare = $str;
4471
4472
            /**
4473
             * @psalm-suppress PossiblyInvalidArgument
4474
             */
4475 6
            $str = self::fix_simple_utf8(
4476 6
                \rawurldecode(
4477 6
                    self::html_entity_decode(
4478 6
                        self::to_utf8($str),
4479 6
                        \ENT_QUOTES | \ENT_HTML5
4480
                    )
4481
                )
4482
            );
4483 6
        } while ($multi_decode === true && $str_compare !== $str);
4484
4485 6
        return $str;
4486
    }
4487
4488
    /**
4489
     * Replaces all occurrences of $pattern in $str by $replacement.
4490
     *
4491
     * @param string $str         <p>The input string.</p>
4492
     * @param string $pattern     <p>The regular expression pattern.</p>
4493
     * @param string $replacement <p>The string to replace with.</p>
4494
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4495
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4496
     *
4497
     * @return string
4498
     */
4499 18
    public static function regex_replace(
4500
        string $str,
4501
        string $pattern,
4502
        string $replacement,
4503
        string $options = '',
4504
        string $delimiter = '/'
4505
    ): string {
4506 18
        if ($options === 'msr') {
4507 9
            $options = 'ms';
4508
        }
4509
4510
        // fallback
4511 18
        if (!$delimiter) {
4512
            $delimiter = '/';
4513
        }
4514
4515 18
        return (string) \preg_replace(
4516 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4517 18
            $replacement,
4518 18
            $str
4519
        );
4520
    }
4521
4522
    /**
4523
     * alias for "UTF8::remove_bom()"
4524
     *
4525
     * @param string $str
4526
     *
4527
     * @return string
4528
     *
4529
     * @see UTF8::remove_bom()
4530
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4531
     */
4532
    public static function removeBOM(string $str): string
4533
    {
4534
        return self::remove_bom($str);
4535
    }
4536
4537
    /**
4538
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4539
     *
4540
     * @param string $str <p>The input string.</p>
4541
     *
4542
     * @return string string without UTF-BOM
4543
     */
4544 55
    public static function remove_bom(string $str): string
4545
    {
4546 55
        if ($str === '') {
4547 9
            return '';
4548
        }
4549
4550 55
        $str_length = \strlen($str);
4551 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4552 55
            if (\strpos($str, $bom_string, 0) === 0) {
4553
                /** @var string|false $str_tmp - needed for PhpStan (stubs error) */
4554 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4555 11
                if ($str_tmp === false) {
4556
                    return '';
4557
                }
4558
4559 11
                $str_length -= (int) $bom_byte_length;
4560
4561 55
                $str = (string) $str_tmp;
4562
            }
4563
        }
4564
4565 55
        return $str;
4566
    }
4567
4568
    /**
4569
     * Removes duplicate occurrences of a string in another string.
4570
     *
4571
     * @param string          $str  <p>The base string.</p>
4572
     * @param string|string[] $what <p>String to search for in the base string.</p>
4573
     *
4574
     * @return string the result string with removed duplicates
4575
     */
4576 2
    public static function remove_duplicates(string $str, $what = ' '): string
4577
    {
4578 2
        if (\is_string($what) === true) {
4579 2
            $what = [$what];
4580
        }
4581
4582 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4583
            /** @noinspection ForeachSourceInspection */
4584 2
            foreach ($what as $item) {
4585 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str);
4586
            }
4587
        }
4588
4589 2
        return $str;
4590
    }
4591
4592
    /**
4593
     * Remove html via "strip_tags()" from the string.
4594
     *
4595
     * @param string $str
4596
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4597
     *                               not be stripped. Default: null
4598
     *                               </p>
4599
     *
4600
     * @return string
4601
     */
4602 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
4603
    {
4604 6
        return \strip_tags($str, $allowable_tags);
4605
    }
4606
4607
    /**
4608
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4609
     *
4610
     * @param string $str
4611
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4612
     *
4613
     * @return string
4614
     */
4615 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4616
    {
4617 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4618
    }
4619
4620
    /**
4621
     * Remove invisible characters from a string.
4622
     *
4623
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4624
     *
4625
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4626
     *
4627
     * @param string $str
4628
     * @param bool   $url_encoded
4629
     * @param string $replacement
4630
     *
4631
     * @return string
4632
     */
4633 89
    public static function remove_invisible_characters(
4634
        string $str,
4635
        bool $url_encoded = true,
4636
        string $replacement = ''
4637
    ): string {
4638 89
        return ASCII::remove_invisible_characters(
4639 89
            $str,
4640 89
            $url_encoded,
4641 89
            $replacement
4642
        );
4643
    }
4644
4645
    /**
4646
     * Returns a new string with the prefix $substring removed, if present.
4647
     *
4648
     * @param string $str
4649
     * @param string $substring <p>The prefix to remove.</p>
4650
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4651
     *
4652
     * @return string string without the prefix $substring
4653
     */
4654 12
    public static function remove_left(
4655
        string $str,
4656
        string $substring,
4657
        string $encoding = 'UTF-8'
4658
    ): string {
4659 12
        if ($substring && \strpos($str, $substring) === 0) {
4660 6
            if ($encoding === 'UTF-8') {
4661 4
                return (string) \mb_substr(
4662 4
                    $str,
4663 4
                    (int) \mb_strlen($substring)
4664
                );
4665
            }
4666
4667 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4668
4669 2
            return (string) self::substr(
4670 2
                $str,
4671 2
                (int) self::strlen($substring, $encoding),
4672 2
                null,
4673 2
                $encoding
4674
            );
4675
        }
4676
4677 6
        return $str;
4678
    }
4679
4680
    /**
4681
     * Returns a new string with the suffix $substring removed, if present.
4682
     *
4683
     * @param string $str
4684
     * @param string $substring <p>The suffix to remove.</p>
4685
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4686
     *
4687
     * @return string string having a $str without the suffix $substring
4688
     */
4689 12
    public static function remove_right(
4690
        string $str,
4691
        string $substring,
4692
        string $encoding = 'UTF-8'
4693
    ): string {
4694 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4695 6
            if ($encoding === 'UTF-8') {
4696 4
                return (string) \mb_substr(
4697 4
                    $str,
4698 4
                    0,
4699 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4700
                );
4701
            }
4702
4703 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4704
4705 2
            return (string) self::substr(
4706 2
                $str,
4707 2
                0,
4708 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4709 2
                $encoding
4710
            );
4711
        }
4712
4713 6
        return $str;
4714
    }
4715
4716
    /**
4717
     * Replaces all occurrences of $search in $str by $replacement.
4718
     *
4719
     * @param string $str            <p>The input string.</p>
4720
     * @param string $search         <p>The needle to search for.</p>
4721
     * @param string $replacement    <p>The string to replace with.</p>
4722
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4723
     *
4724
     * @return string string after the replacements
4725
     */
4726 29
    public static function replace(
4727
        string $str,
4728
        string $search,
4729
        string $replacement,
4730
        bool $case_sensitive = true
4731
    ): string {
4732 29
        if ($case_sensitive) {
4733 22
            return \str_replace($search, $replacement, $str);
4734
        }
4735
4736 7
        return self::str_ireplace($search, $replacement, $str);
4737
    }
4738
4739
    /**
4740
     * Replaces all occurrences of $search in $str by $replacement.
4741
     *
4742
     * @param string       $str            <p>The input string.</p>
4743
     * @param array        $search         <p>The elements to search for.</p>
4744
     * @param array|string $replacement    <p>The string to replace with.</p>
4745
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4746
     *
4747
     * @return string string after the replacements
4748
     */
4749 30
    public static function replace_all(
4750
        string $str,
4751
        array $search,
4752
        $replacement,
4753
        bool $case_sensitive = true
4754
    ): string {
4755 30
        if ($case_sensitive) {
4756 23
            return \str_replace($search, $replacement, $str);
4757
        }
4758
4759 7
        return self::str_ireplace($search, $replacement, $str);
4760
    }
4761
4762
    /**
4763
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4764
     *
4765
     * @param string $str                        <p>The input string</p>
4766
     * @param string $replacement_char           <p>The replacement character.</p>
4767
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4768
     *
4769
     * @return string
4770
     */
4771 35
    public static function replace_diamond_question_mark(
4772
        string $str,
4773
        string $replacement_char = '',
4774
        bool $process_invalid_utf8_chars = true
4775
    ): string {
4776 35
        if ($str === '') {
4777 9
            return '';
4778
        }
4779
4780 35
        if ($process_invalid_utf8_chars === true) {
4781 35
            $replacement_char_helper = $replacement_char;
4782 35
            if ($replacement_char === '') {
4783 35
                $replacement_char_helper = 'none';
4784
            }
4785
4786 35
            if (self::$SUPPORT['mbstring'] === false) {
4787
                // if there is no native support for "mbstring",
4788
                // then we need to clean the string before ...
4789
                $str = self::clean($str);
4790
            }
4791
4792 35
            $save = \mb_substitute_character();
4793 35
            \mb_substitute_character($replacement_char_helper);
4794
            // the polyfill maybe return false, so cast to string
4795 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4796 35
            \mb_substitute_character($save);
4797
        }
4798
4799 35
        return \str_replace(
4800
            [
4801 35
                "\xEF\xBF\xBD",
4802
                '�',
4803
            ],
4804
            [
4805 35
                $replacement_char,
4806 35
                $replacement_char,
4807
            ],
4808 35
            $str
4809
        );
4810
    }
4811
4812
    /**
4813
     * Strip whitespace or other characters from the end of a UTF-8 string.
4814
     *
4815
     * @param string      $str   <p>The string to be trimmed.</p>
4816
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4817
     *
4818
     * @return string the string with unwanted characters stripped from the right
4819
     */
4820 20
    public static function rtrim(string $str = '', string $chars = null): string
4821
    {
4822 20
        if ($str === '') {
4823 3
            return '';
4824
        }
4825
4826 19
        if ($chars) {
4827 8
            $chars = \preg_quote($chars, '/');
4828 8
            $pattern = "[${chars}]+$";
4829
        } else {
4830 14
            $pattern = '[\\s]+$';
4831
        }
4832
4833 19
        if (self::$SUPPORT['mbstring'] === true) {
4834
            /** @noinspection PhpComposerExtensionStubsInspection */
4835 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4836
        }
4837
4838
        return self::regex_replace($str, $pattern, '', '', '/');
4839
    }
4840
4841
    /**
4842
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4843
     *
4844
     * @psalm-suppress MissingReturnType
4845
     */
4846 2
    public static function showSupport()
4847
    {
4848 2
        echo '<pre>';
4849 2
        foreach (self::$SUPPORT as $key => &$value) {
4850 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4851
        }
4852 2
        unset($value);
4853 2
        echo '</pre>';
4854 2
    }
4855
4856
    /**
4857
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4858
     *
4859
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4860
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4861
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4862
     *
4863
     * @return string the HTML numbered entity
4864
     */
4865 2
    public static function single_chr_html_encode(
4866
        string $char,
4867
        bool $keep_ascii_chars = false,
4868
        string $encoding = 'UTF-8'
4869
    ): string {
4870 2
        if ($char === '') {
4871 2
            return '';
4872
        }
4873
4874
        if (
4875 2
            $keep_ascii_chars === true
4876
            &&
4877 2
            ASCII::is_ascii($char) === true
4878
        ) {
4879 2
            return $char;
4880
        }
4881
4882 2
        return '&#' . self::ord($char, $encoding) . ';';
4883
    }
4884
4885
    /**
4886
     * @param string $str
4887
     * @param int    $tab_length
4888
     *
4889
     * @return string
4890
     */
4891 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
4892
    {
4893 5
        if ($tab_length === 4) {
4894 3
            $tab = '    ';
4895 2
        } elseif ($tab_length === 2) {
4896 1
            $tab = '  ';
4897
        } else {
4898 1
            $tab = \str_repeat(' ', $tab_length);
4899
        }
4900
4901 5
        return \str_replace($tab, "\t", $str);
4902
    }
4903
4904
    /**
4905
     * alias for "UTF8::str_split()"
4906
     *
4907
     * @param string|string[] $str
4908
     * @param int             $length
4909
     * @param bool            $clean_utf8
4910
     *
4911
     * @return string[]
4912
     *
4913
     * @see UTF8::str_split()
4914
     * @deprecated <p>please use "UTF8::str_split()"</p>
4915
     */
4916 9
    public static function split(
4917
        $str,
4918
        int $length = 1,
4919
        bool $clean_utf8 = false
4920
    ): array {
4921 9
        return self::str_split($str, $length, $clean_utf8);
4922
    }
4923
4924
    /**
4925
     * alias for "UTF8::str_starts_with()"
4926
     *
4927
     * @param string $haystack
4928
     * @param string $needle
4929
     *
4930
     * @return bool
4931
     *
4932
     * @see UTF8::str_starts_with()
4933
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
4934
     */
4935
    public static function str_begins(string $haystack, string $needle): bool
4936
    {
4937
        return self::str_starts_with($haystack, $needle);
4938
    }
4939
4940
    /**
4941
     * Returns a camelCase version of the string. Trims surrounding spaces,
4942
     * capitalizes letters following digits, spaces, dashes and underscores,
4943
     * and removes spaces, dashes, as well as underscores.
4944
     *
4945
     * @param string      $str                           <p>The input string.</p>
4946
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
4947
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4948
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
4949
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
4950
     *
4951
     * @return string
4952
     */
4953 32
    public static function str_camelize(
4954
        string $str,
4955
        string $encoding = 'UTF-8',
4956
        bool $clean_utf8 = false,
4957
        string $lang = null,
4958
        bool $try_to_keep_the_string_length = false
4959
    ): string {
4960 32
        if ($clean_utf8 === true) {
4961
            $str = self::clean($str);
4962
        }
4963
4964 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4965 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4966
        }
4967
4968 32
        $str = self::lcfirst(
4969 32
            \trim($str),
4970 32
            $encoding,
4971 32
            false,
4972 32
            $lang,
4973 32
            $try_to_keep_the_string_length
4974
        );
4975 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
4976
4977 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
4978
4979 32
        $str = (string) \preg_replace_callback(
4980 32
            '/[-_\\s]+(.)?/u',
4981
            /**
4982
             * @param array $match
4983
             *
4984
             * @return string
4985
             */
4986
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
4987 27
                if (isset($match[1])) {
4988 27
                    if ($use_mb_functions === true) {
4989 27
                        if ($encoding === 'UTF-8') {
4990 27
                            return \mb_strtoupper($match[1]);
4991
                        }
4992
4993
                        return \mb_strtoupper($match[1], $encoding);
4994
                    }
4995
4996
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
4997
                }
4998
4999 1
                return '';
5000 32
            },
5001 32
            $str
5002
        );
5003
5004 32
        return (string) \preg_replace_callback(
5005 32
            '/[\\p{N}]+(.)?/u',
5006
            /**
5007
             * @param array $match
5008
             *
5009
             * @return string
5010
             */
5011
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5012 6
                if ($use_mb_functions === true) {
5013 6
                    if ($encoding === 'UTF-8') {
5014 6
                        return \mb_strtoupper($match[0]);
5015
                    }
5016
5017
                    return \mb_strtoupper($match[0], $encoding);
5018
                }
5019
5020
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5021 32
            },
5022 32
            $str
5023
        );
5024
    }
5025
5026
    /**
5027
     * Returns the string with the first letter of each word capitalized,
5028
     * except for when the word is a name which shouldn't be capitalized.
5029
     *
5030
     * @param string $str
5031
     *
5032
     * @return string string with $str capitalized
5033
     */
5034 1
    public static function str_capitalize_name(string $str): string
5035
    {
5036 1
        return self::str_capitalize_name_helper(
5037 1
            self::str_capitalize_name_helper(
5038 1
                self::collapse_whitespace($str),
5039 1
                ' '
5040
            ),
5041 1
            '-'
5042
        );
5043
    }
5044
5045
    /**
5046
     * Returns true if the string contains $needle, false otherwise. By default
5047
     * the comparison is case-sensitive, but can be made insensitive by setting
5048
     * $case_sensitive to false.
5049
     *
5050
     * @param string $haystack       <p>The input string.</p>
5051
     * @param string $needle         <p>Substring to look for.</p>
5052
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5053
     *
5054
     * @return bool whether or not $haystack contains $needle
5055
     */
5056 21
    public static function str_contains(
5057
        string $haystack,
5058
        string $needle,
5059
        bool $case_sensitive = true
5060
    ): bool {
5061 21
        if ($case_sensitive) {
5062 11
            return \strpos($haystack, $needle) !== false;
5063
        }
5064
5065 10
        return \mb_stripos($haystack, $needle) !== false;
5066
    }
5067
5068
    /**
5069
     * Returns true if the string contains all $needles, false otherwise. By
5070
     * default the comparison is case-sensitive, but can be made insensitive by
5071
     * setting $case_sensitive to false.
5072
     *
5073
     * @param string $haystack       <p>The input string.</p>
5074
     * @param array  $needles        <p>SubStrings to look for.</p>
5075
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5076
     *
5077
     * @return bool whether or not $haystack contains $needle
5078
     */
5079 44
    public static function str_contains_all(
5080
        string $haystack,
5081
        array $needles,
5082
        bool $case_sensitive = true
5083
    ): bool {
5084 44
        if ($haystack === '' || $needles === []) {
5085 1
            return false;
5086
        }
5087
5088
        /** @noinspection LoopWhichDoesNotLoopInspection */
5089 43
        foreach ($needles as &$needle) {
5090 43
            if (!$needle) {
5091 1
                return false;
5092
            }
5093
5094 42
            if ($case_sensitive) {
5095 22
                return \strpos($haystack, $needle) !== false;
5096
            }
5097
5098 20
            return \mb_stripos($haystack, $needle) !== false;
5099
        }
5100
5101
        return true;
5102
    }
5103
5104
    /**
5105
     * Returns true if the string contains any $needles, false otherwise. By
5106
     * default the comparison is case-sensitive, but can be made insensitive by
5107
     * setting $case_sensitive to false.
5108
     *
5109
     * @param string $haystack       <p>The input string.</p>
5110
     * @param array  $needles        <p>SubStrings to look for.</p>
5111
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5112
     *
5113
     * @return bool
5114
     *              Whether or not $str contains $needle
5115
     */
5116 46
    public static function str_contains_any(
5117
        string $haystack,
5118
        array $needles,
5119
        bool $case_sensitive = true
5120
    ): bool {
5121 46
        if ($haystack === '' || $needles === []) {
5122 1
            return false;
5123
        }
5124
5125
        /** @noinspection LoopWhichDoesNotLoopInspection */
5126 45
        foreach ($needles as &$needle) {
5127 45
            if (!$needle) {
5128
                continue;
5129
            }
5130
5131 45
            if ($case_sensitive) {
5132 25
                if (\strpos($haystack, $needle) !== false) {
5133 14
                    return true;
5134
                }
5135
5136 13
                continue;
5137
            }
5138
5139 20
            if (\mb_stripos($haystack, $needle) !== false) {
5140 20
                return true;
5141
            }
5142
        }
5143
5144 19
        return false;
5145
    }
5146
5147
    /**
5148
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5149
     * inserted before uppercase characters (with the exception of the first
5150
     * character of the string), and in place of spaces as well as underscores.
5151
     *
5152
     * @param string $str      <p>The input string.</p>
5153
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5154
     *
5155
     * @return string
5156
     */
5157 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5158
    {
5159 19
        return self::str_delimit($str, '-', $encoding);
5160
    }
5161
5162
    /**
5163
     * Returns a lowercase and trimmed string separated by the given delimiter.
5164
     * Delimiters are inserted before uppercase characters (with the exception
5165
     * of the first character of the string), and in place of spaces, dashes,
5166
     * and underscores. Alpha delimiters are not converted to lowercase.
5167
     *
5168
     * @param string      $str                           <p>The input string.</p>
5169
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5170
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5171
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5172
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5173
     *                                                   tr</p>
5174
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5175
     *                                                   ß</p>
5176
     *
5177
     * @return string
5178
     */
5179 49
    public static function str_delimit(
5180
        string $str,
5181
        string $delimiter,
5182
        string $encoding = 'UTF-8',
5183
        bool $clean_utf8 = false,
5184
        string $lang = null,
5185
        bool $try_to_keep_the_string_length = false
5186
    ): string {
5187 49
        if (self::$SUPPORT['mbstring'] === true) {
5188
            /** @noinspection PhpComposerExtensionStubsInspection */
5189 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5190
5191 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5192 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5193 22
                $str = \mb_strtolower($str);
5194
            } else {
5195 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5196
            }
5197
5198
            /** @noinspection PhpComposerExtensionStubsInspection */
5199 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5200
        }
5201
5202
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5203
5204
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5205
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5206
            $str = \mb_strtolower($str);
5207
        } else {
5208
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5209
        }
5210
5211
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5212
    }
5213
5214
    /**
5215
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5216
     *
5217
     * @param string $str <p>The input string.</p>
5218
     *
5219
     * @return false|string
5220
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5221
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5222
     */
5223 30
    public static function str_detect_encoding($str)
5224
    {
5225
        // init
5226 30
        $str = (string) $str;
5227
5228
        //
5229
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5230
        //
5231
5232 30
        if (self::is_binary($str, true) === true) {
5233 11
            $is_utf32 = self::is_utf32($str, false);
5234 11
            if ($is_utf32 === 1) {
5235
                return 'UTF-32LE';
5236
            }
5237 11
            if ($is_utf32 === 2) {
5238 1
                return 'UTF-32BE';
5239
            }
5240
5241 11
            $is_utf16 = self::is_utf16($str, false);
5242 11
            if ($is_utf16 === 1) {
5243 3
                return 'UTF-16LE';
5244
            }
5245 11
            if ($is_utf16 === 2) {
5246 2
                return 'UTF-16BE';
5247
            }
5248
5249
            // is binary but not "UTF-16" or "UTF-32"
5250 9
            return false;
5251
        }
5252
5253
        //
5254
        // 2.) simple check for ASCII chars
5255
        //
5256
5257 26
        if (ASCII::is_ascii($str) === true) {
5258 10
            return 'ASCII';
5259
        }
5260
5261
        //
5262
        // 3.) simple check for UTF-8 chars
5263
        //
5264
5265 26
        if (self::is_utf8_string($str) === true) {
5266 19
            return 'UTF-8';
5267
        }
5268
5269
        //
5270
        // 4.) check via "mb_detect_encoding()"
5271
        //
5272
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5273
5274
        $encoding_detecting_order = [
5275 15
            'ISO-8859-1',
5276
            'ISO-8859-2',
5277
            'ISO-8859-3',
5278
            'ISO-8859-4',
5279
            'ISO-8859-5',
5280
            'ISO-8859-6',
5281
            'ISO-8859-7',
5282
            'ISO-8859-8',
5283
            'ISO-8859-9',
5284
            'ISO-8859-10',
5285
            'ISO-8859-13',
5286
            'ISO-8859-14',
5287
            'ISO-8859-15',
5288
            'ISO-8859-16',
5289
            'WINDOWS-1251',
5290
            'WINDOWS-1252',
5291
            'WINDOWS-1254',
5292
            'CP932',
5293
            'CP936',
5294
            'CP950',
5295
            'CP866',
5296
            'CP850',
5297
            'CP51932',
5298
            'CP50220',
5299
            'CP50221',
5300
            'CP50222',
5301
            'ISO-2022-JP',
5302
            'ISO-2022-KR',
5303
            'JIS',
5304
            'JIS-ms',
5305
            'EUC-CN',
5306
            'EUC-JP',
5307
        ];
5308
5309 15
        if (self::$SUPPORT['mbstring'] === true) {
5310
            // info: do not use the symfony polyfill here
5311 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5312 15
            if ($encoding) {
5313 15
                return $encoding;
5314
            }
5315
        }
5316
5317
        //
5318
        // 5.) check via "iconv()"
5319
        //
5320
5321
        if (self::$ENCODINGS === null) {
5322
            self::$ENCODINGS = self::getData('encodings');
5323
        }
5324
5325
        foreach (self::$ENCODINGS as $encoding_tmp) {
5326
            // INFO: //IGNORE but still throw notice
5327
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5328
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5329
                return $encoding_tmp;
5330
            }
5331
        }
5332
5333
        return false;
5334
    }
5335
5336
    /**
5337
     * alias for "UTF8::str_ends_with()"
5338
     *
5339
     * @param string $haystack
5340
     * @param string $needle
5341
     *
5342
     * @return bool
5343
     *
5344
     * @see UTF8::str_ends_with()
5345
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
5346
     */
5347
    public static function str_ends(string $haystack, string $needle): bool
5348
    {
5349
        return self::str_ends_with($haystack, $needle);
5350
    }
5351
5352
    /**
5353
     * Check if the string ends with the given substring.
5354
     *
5355
     * @param string $haystack <p>The string to search in.</p>
5356
     * @param string $needle   <p>The substring to search for.</p>
5357
     *
5358
     * @return bool
5359
     */
5360 9
    public static function str_ends_with(string $haystack, string $needle): bool
5361
    {
5362 9
        if ($needle === '') {
5363 2
            return true;
5364
        }
5365
5366 9
        if ($haystack === '') {
5367
            return false;
5368
        }
5369
5370 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5371
    }
5372
5373
    /**
5374
     * Returns true if the string ends with any of $substrings, false otherwise.
5375
     *
5376
     * - case-sensitive
5377
     *
5378
     * @param string   $str        <p>The input string.</p>
5379
     * @param string[] $substrings <p>Substrings to look for.</p>
5380
     *
5381
     * @return bool whether or not $str ends with $substring
5382
     */
5383 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5384
    {
5385 7
        if ($substrings === []) {
5386
            return false;
5387
        }
5388
5389 7
        foreach ($substrings as &$substring) {
5390 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5391 7
                return true;
5392
            }
5393
        }
5394
5395 6
        return false;
5396
    }
5397
5398
    /**
5399
     * Ensures that the string begins with $substring. If it doesn't, it's
5400
     * prepended.
5401
     *
5402
     * @param string $str       <p>The input string.</p>
5403
     * @param string $substring <p>The substring to add if not present.</p>
5404
     *
5405
     * @return string
5406
     */
5407 10
    public static function str_ensure_left(string $str, string $substring): string
5408
    {
5409
        if (
5410 10
            $substring !== ''
5411
            &&
5412 10
            \strpos($str, $substring) === 0
5413
        ) {
5414 6
            return $str;
5415
        }
5416
5417 4
        return $substring . $str;
5418
    }
5419
5420
    /**
5421
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5422
     *
5423
     * @param string $str       <p>The input string.</p>
5424
     * @param string $substring <p>The substring to add if not present.</p>
5425
     *
5426
     * @return string
5427
     */
5428 10
    public static function str_ensure_right(string $str, string $substring): string
5429
    {
5430
        if (
5431 10
            $str === ''
5432
            ||
5433 10
            $substring === ''
5434
            ||
5435 10
            \substr($str, -\strlen($substring)) !== $substring
5436
        ) {
5437 4
            $str .= $substring;
5438
        }
5439
5440 10
        return $str;
5441
    }
5442
5443
    /**
5444
     * Capitalizes the first word of the string, replaces underscores with
5445
     * spaces, and strips '_id'.
5446
     *
5447
     * @param string $str
5448
     *
5449
     * @return string
5450
     */
5451 3
    public static function str_humanize($str): string
5452
    {
5453 3
        $str = \str_replace(
5454
            [
5455 3
                '_id',
5456
                '_',
5457
            ],
5458
            [
5459 3
                '',
5460
                ' ',
5461
            ],
5462 3
            $str
5463
        );
5464
5465 3
        return self::ucfirst(\trim($str));
5466
    }
5467
5468
    /**
5469
     * alias for "UTF8::str_istarts_with()"
5470
     *
5471
     * @param string $haystack
5472
     * @param string $needle
5473
     *
5474
     * @return bool
5475
     *
5476
     * @see UTF8::str_istarts_with()
5477
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
5478
     */
5479
    public static function str_ibegins(string $haystack, string $needle): bool
5480
    {
5481
        return self::str_istarts_with($haystack, $needle);
5482
    }
5483
5484
    /**
5485
     * alias for "UTF8::str_iends_with()"
5486
     *
5487
     * @param string $haystack
5488
     * @param string $needle
5489
     *
5490
     * @return bool
5491
     *
5492
     * @see UTF8::str_iends_with()
5493
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
5494
     */
5495
    public static function str_iends(string $haystack, string $needle): bool
5496
    {
5497
        return self::str_iends_with($haystack, $needle);
5498
    }
5499
5500
    /**
5501
     * Check if the string ends with the given substring, case-insensitive.
5502
     *
5503
     * @param string $haystack <p>The string to search in.</p>
5504
     * @param string $needle   <p>The substring to search for.</p>
5505
     *
5506
     * @return bool
5507
     */
5508 12
    public static function str_iends_with(string $haystack, string $needle): bool
5509
    {
5510 12
        if ($needle === '') {
5511 2
            return true;
5512
        }
5513
5514 12
        if ($haystack === '') {
5515
            return false;
5516
        }
5517
5518 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5519
    }
5520
5521
    /**
5522
     * Returns true if the string ends with any of $substrings, false otherwise.
5523
     *
5524
     * - case-insensitive
5525
     *
5526
     * @param string   $str        <p>The input string.</p>
5527
     * @param string[] $substrings <p>Substrings to look for.</p>
5528
     *
5529
     * @return bool
5530
     *              <p>Whether or not $str ends with $substring.</p>
5531
     */
5532 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5533
    {
5534 4
        if ($substrings === []) {
5535
            return false;
5536
        }
5537
5538 4
        foreach ($substrings as &$substring) {
5539 4
            if (self::str_iends_with($str, $substring)) {
5540 4
                return true;
5541
            }
5542
        }
5543
5544
        return false;
5545
    }
5546
5547
    /**
5548
     * Returns the index of the first occurrence of $needle in the string,
5549
     * and false if not found. Accepts an optional offset from which to begin
5550
     * the search.
5551
     *
5552
     * @param string $str      <p>The input string.</p>
5553
     * @param string $needle   <p>Substring to look for.</p>
5554
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5555
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5556
     *
5557
     * @return false|int
5558
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5559
     *
5560
     * @see UTF8::stripos()
5561
     * @deprecated <p>please use "UTF8::stripos()"</p>
5562
     */
5563
    public static function str_iindex_first(
5564
        string $str,
5565
        string $needle,
5566
        int $offset = 0,
5567
        string $encoding = 'UTF-8'
5568
    ) {
5569
        return self::stripos(
5570
            $str,
5571
            $needle,
5572
            $offset,
5573
            $encoding
5574
        );
5575
    }
5576
5577
    /**
5578
     * Returns the index of the last occurrence of $needle in the string,
5579
     * and false if not found. Accepts an optional offset from which to begin
5580
     * the search. Offsets may be negative to count from the last character
5581
     * in the string.
5582
     *
5583
     * @param string $str      <p>The input string.</p>
5584
     * @param string $needle   <p>Substring to look for.</p>
5585
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5586
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5587
     *
5588
     * @return false|int
5589
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5590
     *
5591
     * @see UTF8::strripos()
5592
     * @deprecated <p>please use "UTF8::strripos()"</p>
5593
     */
5594
    public static function str_iindex_last(
5595
        string $str,
5596
        string $needle,
5597
        int $offset = 0,
5598
        string $encoding = 'UTF-8'
5599
    ) {
5600
        return self::strripos(
5601
            $str,
5602
            $needle,
5603
            $offset,
5604
            $encoding
5605
        );
5606
    }
5607
5608
    /**
5609
     * Returns the index of the first occurrence of $needle in the string,
5610
     * and false if not found. Accepts an optional offset from which to begin
5611
     * the search.
5612
     *
5613
     * @param string $str      <p>The input string.</p>
5614
     * @param string $needle   <p>Substring to look for.</p>
5615
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5616
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5617
     *
5618
     * @return false|int
5619
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5620
     *
5621
     * @see UTF8::strpos()
5622
     * @deprecated <p>please use "UTF8::strpos()"</p>
5623
     */
5624 10
    public static function str_index_first(
5625
        string $str,
5626
        string $needle,
5627
        int $offset = 0,
5628
        string $encoding = 'UTF-8'
5629
    ) {
5630 10
        return self::strpos(
5631 10
            $str,
5632 10
            $needle,
5633 10
            $offset,
5634 10
            $encoding
5635
        );
5636
    }
5637
5638
    /**
5639
     * Returns the index of the last occurrence of $needle in the string,
5640
     * and false if not found. Accepts an optional offset from which to begin
5641
     * the search. Offsets may be negative to count from the last character
5642
     * in the string.
5643
     *
5644
     * @param string $str      <p>The input string.</p>
5645
     * @param string $needle   <p>Substring to look for.</p>
5646
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5647
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5648
     *
5649
     * @return false|int
5650
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5651
     *
5652
     * @see UTF8::strrpos()
5653
     * @deprecated <p>please use "UTF8::strrpos()"</p>
5654
     */
5655 10
    public static function str_index_last(
5656
        string $str,
5657
        string $needle,
5658
        int $offset = 0,
5659
        string $encoding = 'UTF-8'
5660
    ) {
5661 10
        return self::strrpos(
5662 10
            $str,
5663 10
            $needle,
5664 10
            $offset,
5665 10
            $encoding
5666
        );
5667
    }
5668
5669
    /**
5670
     * Inserts $substring into the string at the $index provided.
5671
     *
5672
     * @param string $str       <p>The input string.</p>
5673
     * @param string $substring <p>String to be inserted.</p>
5674
     * @param int    $index     <p>The index at which to insert the substring.</p>
5675
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5676
     *
5677
     * @return string
5678
     */
5679 8
    public static function str_insert(
5680
        string $str,
5681
        string $substring,
5682
        int $index,
5683
        string $encoding = 'UTF-8'
5684
    ): string {
5685 8
        if ($encoding === 'UTF-8') {
5686 4
            $len = (int) \mb_strlen($str);
5687 4
            if ($index > $len) {
5688
                return $str;
5689
            }
5690
5691
            /** @noinspection UnnecessaryCastingInspection */
5692 4
            return (string) \mb_substr($str, 0, $index) .
5693 4
                   $substring .
5694 4
                   (string) \mb_substr($str, $index, $len);
5695
        }
5696
5697 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5698
5699 4
        $len = (int) self::strlen($str, $encoding);
5700 4
        if ($index > $len) {
5701 1
            return $str;
5702
        }
5703
5704 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5705 3
               $substring .
5706 3
               ((string) self::substr($str, $index, $len, $encoding));
5707
    }
5708
5709
    /**
5710
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5711
     *
5712
     * @see http://php.net/manual/en/function.str-ireplace.php
5713
     *
5714
     * @param mixed $search  <p>
5715
     *                       Every replacement with search array is
5716
     *                       performed on the result of previous replacement.
5717
     *                       </p>
5718
     * @param mixed $replace <p>
5719
     *                       </p>
5720
     * @param mixed $subject <p>
5721
     *                       If subject is an array, then the search and
5722
     *                       replace is performed with every entry of
5723
     *                       subject, and the return value is an array as
5724
     *                       well.
5725
     *                       </p>
5726
     * @param int   $count   [optional] <p>
5727
     *                       The number of matched and replaced needles will
5728
     *                       be returned in count which is passed by
5729
     *                       reference.
5730
     *                       </p>
5731
     *
5732
     * @return mixed a string or an array of replacements
5733
     */
5734 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5735
    {
5736 29
        $search = (array) $search;
5737
5738
        /** @noinspection AlterInForeachInspection */
5739 29
        foreach ($search as &$s) {
5740 29
            $s = (string) $s;
5741 29
            if ($s === '') {
5742 6
                $s = '/^(?<=.)$/';
5743
            } else {
5744 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5745
            }
5746
        }
5747
5748 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5749 29
        $count = $replace; // used as reference parameter
5750
5751 29
        return $subject;
5752
    }
5753
5754
    /**
5755
     * Replaces $search from the beginning of string with $replacement.
5756
     *
5757
     * @param string $str         <p>The input string.</p>
5758
     * @param string $search      <p>The string to search for.</p>
5759
     * @param string $replacement <p>The replacement.</p>
5760
     *
5761
     * @return string string after the replacements
5762
     */
5763 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5764
    {
5765 17
        if ($str === '') {
5766 4
            if ($replacement === '') {
5767 2
                return '';
5768
            }
5769
5770 2
            if ($search === '') {
5771 2
                return $replacement;
5772
            }
5773
        }
5774
5775 13
        if ($search === '') {
5776 2
            return $str . $replacement;
5777
        }
5778
5779 11
        if (\stripos($str, $search) === 0) {
5780 10
            return $replacement . \substr($str, \strlen($search));
5781
        }
5782
5783 1
        return $str;
5784
    }
5785
5786
    /**
5787
     * Replaces $search from the ending of string with $replacement.
5788
     *
5789
     * @param string $str         <p>The input string.</p>
5790
     * @param string $search      <p>The string to search for.</p>
5791
     * @param string $replacement <p>The replacement.</p>
5792
     *
5793
     * @return string string after the replacements
5794
     */
5795 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5796
    {
5797 17
        if ($str === '') {
5798 4
            if ($replacement === '') {
5799 2
                return '';
5800
            }
5801
5802 2
            if ($search === '') {
5803 2
                return $replacement;
5804
            }
5805
        }
5806
5807 13
        if ($search === '') {
5808 2
            return $str . $replacement;
5809
        }
5810
5811 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5812 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5813
        }
5814
5815 11
        return $str;
5816
    }
5817
5818
    /**
5819
     * Check if the string starts with the given substring, case-insensitive.
5820
     *
5821
     * @param string $haystack <p>The string to search in.</p>
5822
     * @param string $needle   <p>The substring to search for.</p>
5823
     *
5824
     * @return bool
5825
     */
5826 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5827
    {
5828 12
        if ($needle === '') {
5829 2
            return true;
5830
        }
5831
5832 12
        if ($haystack === '') {
5833
            return false;
5834
        }
5835
5836 12
        return self::stripos($haystack, $needle) === 0;
5837
    }
5838
5839
    /**
5840
     * Returns true if the string begins with any of $substrings, false otherwise.
5841
     *
5842
     * - case-insensitive
5843
     *
5844
     * @param string $str        <p>The input string.</p>
5845
     * @param array  $substrings <p>Substrings to look for.</p>
5846
     *
5847
     * @return bool whether or not $str starts with $substring
5848
     */
5849 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5850
    {
5851 4
        if ($str === '') {
5852
            return false;
5853
        }
5854
5855 4
        if ($substrings === []) {
5856
            return false;
5857
        }
5858
5859 4
        foreach ($substrings as &$substring) {
5860 4
            if (self::str_istarts_with($str, $substring)) {
5861 4
                return true;
5862
            }
5863
        }
5864
5865
        return false;
5866
    }
5867
5868
    /**
5869
     * Gets the substring after the first occurrence of a separator.
5870
     *
5871
     * @param string $str       <p>The input string.</p>
5872
     * @param string $separator <p>The string separator.</p>
5873
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5874
     *
5875
     * @return string
5876
     */
5877 1
    public static function str_isubstr_after_first_separator(
5878
        string $str,
5879
        string $separator,
5880
        string $encoding = 'UTF-8'
5881
    ): string {
5882 1
        if ($separator === '' || $str === '') {
5883 1
            return '';
5884
        }
5885
5886 1
        $offset = self::stripos($str, $separator);
5887 1
        if ($offset === false) {
5888 1
            return '';
5889
        }
5890
5891 1
        if ($encoding === 'UTF-8') {
5892 1
            return (string) \mb_substr(
5893 1
                $str,
5894 1
                $offset + (int) \mb_strlen($separator)
5895
            );
5896
        }
5897
5898
        return (string) self::substr(
5899
            $str,
5900
            $offset + (int) self::strlen($separator, $encoding),
5901
            null,
5902
            $encoding
5903
        );
5904
    }
5905
5906
    /**
5907
     * Gets the substring after the last occurrence of a separator.
5908
     *
5909
     * @param string $str       <p>The input string.</p>
5910
     * @param string $separator <p>The string separator.</p>
5911
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5912
     *
5913
     * @return string
5914
     */
5915 1
    public static function str_isubstr_after_last_separator(
5916
        string $str,
5917
        string $separator,
5918
        string $encoding = 'UTF-8'
5919
    ): string {
5920 1
        if ($separator === '' || $str === '') {
5921 1
            return '';
5922
        }
5923
5924 1
        $offset = self::strripos($str, $separator);
5925 1
        if ($offset === false) {
5926 1
            return '';
5927
        }
5928
5929 1
        if ($encoding === 'UTF-8') {
5930 1
            return (string) \mb_substr(
5931 1
                $str,
5932 1
                $offset + (int) self::strlen($separator)
5933
            );
5934
        }
5935
5936
        return (string) self::substr(
5937
            $str,
5938
            $offset + (int) self::strlen($separator, $encoding),
5939
            null,
5940
            $encoding
5941
        );
5942
    }
5943
5944
    /**
5945
     * Gets the substring before the first occurrence of a separator.
5946
     *
5947
     * @param string $str       <p>The input string.</p>
5948
     * @param string $separator <p>The string separator.</p>
5949
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5950
     *
5951
     * @return string
5952
     */
5953 1
    public static function str_isubstr_before_first_separator(
5954
        string $str,
5955
        string $separator,
5956
        string $encoding = 'UTF-8'
5957
    ): string {
5958 1
        if ($separator === '' || $str === '') {
5959 1
            return '';
5960
        }
5961
5962 1
        $offset = self::stripos($str, $separator);
5963 1
        if ($offset === false) {
5964 1
            return '';
5965
        }
5966
5967 1
        if ($encoding === 'UTF-8') {
5968 1
            return (string) \mb_substr($str, 0, $offset);
5969
        }
5970
5971
        return (string) self::substr($str, 0, $offset, $encoding);
5972
    }
5973
5974
    /**
5975
     * Gets the substring before the last occurrence of a separator.
5976
     *
5977
     * @param string $str       <p>The input string.</p>
5978
     * @param string $separator <p>The string separator.</p>
5979
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5980
     *
5981
     * @return string
5982
     */
5983 1
    public static function str_isubstr_before_last_separator(
5984
        string $str,
5985
        string $separator,
5986
        string $encoding = 'UTF-8'
5987
    ): string {
5988 1
        if ($separator === '' || $str === '') {
5989 1
            return '';
5990
        }
5991
5992 1
        if ($encoding === 'UTF-8') {
5993 1
            $offset = \mb_strripos($str, $separator);
5994 1
            if ($offset === false) {
5995 1
                return '';
5996
            }
5997
5998 1
            return (string) \mb_substr($str, 0, $offset);
5999
        }
6000
6001
        $offset = self::strripos($str, $separator, 0, $encoding);
6002
        if ($offset === false) {
6003
            return '';
6004
        }
6005
6006
        return (string) self::substr($str, 0, $offset, $encoding);
6007
    }
6008
6009
    /**
6010
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6011
     *
6012
     * @param string $str           <p>The input string.</p>
6013
     * @param string $needle        <p>The string to look for.</p>
6014
     * @param bool   $before_needle [optional] <p>Default: false</p>
6015
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6016
     *
6017
     * @return string
6018
     */
6019 2
    public static function str_isubstr_first(
6020
        string $str,
6021
        string $needle,
6022
        bool $before_needle = false,
6023
        string $encoding = 'UTF-8'
6024
    ): string {
6025
        if (
6026 2
            $needle === ''
6027
            ||
6028 2
            $str === ''
6029
        ) {
6030 2
            return '';
6031
        }
6032
6033 2
        $part = self::stristr(
6034 2
            $str,
6035 2
            $needle,
6036 2
            $before_needle,
6037 2
            $encoding
6038
        );
6039 2
        if ($part === false) {
6040 2
            return '';
6041
        }
6042
6043 2
        return $part;
6044
    }
6045
6046
    /**
6047
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6048
     *
6049
     * @param string $str           <p>The input string.</p>
6050
     * @param string $needle        <p>The string to look for.</p>
6051
     * @param bool   $before_needle [optional] <p>Default: false</p>
6052
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6053
     *
6054
     * @return string
6055
     */
6056 1
    public static function str_isubstr_last(
6057
        string $str,
6058
        string $needle,
6059
        bool $before_needle = false,
6060
        string $encoding = 'UTF-8'
6061
    ): string {
6062
        if (
6063 1
            $needle === ''
6064
            ||
6065 1
            $str === ''
6066
        ) {
6067 1
            return '';
6068
        }
6069
6070 1
        $part = self::strrichr(
6071 1
            $str,
6072 1
            $needle,
6073 1
            $before_needle,
6074 1
            $encoding
6075
        );
6076 1
        if ($part === false) {
6077 1
            return '';
6078
        }
6079
6080 1
        return $part;
6081
    }
6082
6083
    /**
6084
     * Returns the last $n characters of the string.
6085
     *
6086
     * @param string $str      <p>The input string.</p>
6087
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6088
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6089
     *
6090
     * @return string
6091
     */
6092 12
    public static function str_last_char(
6093
        string $str,
6094
        int $n = 1,
6095
        string $encoding = 'UTF-8'
6096
    ): string {
6097 12
        if ($str === '' || $n <= 0) {
6098 4
            return '';
6099
        }
6100
6101 8
        if ($encoding === 'UTF-8') {
6102 4
            return (string) \mb_substr($str, -$n);
6103
        }
6104
6105 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6106
6107 4
        return (string) self::substr($str, -$n, null, $encoding);
6108
    }
6109
6110
    /**
6111
     * Limit the number of characters in a string.
6112
     *
6113
     * @param string $str        <p>The input string.</p>
6114
     * @param int    $length     [optional] <p>Default: 100</p>
6115
     * @param string $str_add_on [optional] <p>Default: …</p>
6116
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6117
     *
6118
     * @return string
6119
     */
6120 2
    public static function str_limit(
6121
        string $str,
6122
        int $length = 100,
6123
        string $str_add_on = '…',
6124
        string $encoding = 'UTF-8'
6125
    ): string {
6126 2
        if ($str === '' || $length <= 0) {
6127 2
            return '';
6128
        }
6129
6130 2
        if ($encoding === 'UTF-8') {
6131 2
            if ((int) \mb_strlen($str) <= $length) {
6132 2
                return $str;
6133
            }
6134
6135
            /** @noinspection UnnecessaryCastingInspection */
6136 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6137
        }
6138
6139
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6140
6141
        if ((int) self::strlen($str, $encoding) <= $length) {
6142
            return $str;
6143
        }
6144
6145
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6146
    }
6147
6148
    /**
6149
     * Limit the number of characters in a string, but also after the next word.
6150
     *
6151
     * @param string $str        <p>The input string.</p>
6152
     * @param int    $length     [optional] <p>Default: 100</p>
6153
     * @param string $str_add_on [optional] <p>Default: …</p>
6154
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6155
     *
6156
     * @return string
6157
     */
6158 6
    public static function str_limit_after_word(
6159
        string $str,
6160
        int $length = 100,
6161
        string $str_add_on = '…',
6162
        string $encoding = 'UTF-8'
6163
    ): string {
6164 6
        if ($str === '' || $length <= 0) {
6165 2
            return '';
6166
        }
6167
6168 6
        if ($encoding === 'UTF-8') {
6169
            /** @noinspection UnnecessaryCastingInspection */
6170 2
            if ((int) \mb_strlen($str) <= $length) {
6171 2
                return $str;
6172
            }
6173
6174 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6175 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6176
            }
6177
6178 2
            $str = \mb_substr($str, 0, $length);
6179
6180 2
            $array = \explode(' ', $str);
6181 2
            \array_pop($array);
6182 2
            $new_str = \implode(' ', $array);
6183
6184 2
            if ($new_str === '') {
6185 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6186
            }
6187
        } else {
6188 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6189
                return $str;
6190
            }
6191
6192 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6193 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6194
            }
6195
6196
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6197 1
            $str = self::substr($str, 0, $length, $encoding);
6198
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6199 1
            if ($str === false) {
6200
                return '' . $str_add_on;
6201
            }
6202
6203 1
            $array = \explode(' ', $str);
6204 1
            \array_pop($array);
6205 1
            $new_str = \implode(' ', $array);
6206
6207 1
            if ($new_str === '') {
6208
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6209
            }
6210
        }
6211
6212 3
        return $new_str . $str_add_on;
6213
    }
6214
6215
    /**
6216
     * Returns the longest common prefix between the $str1 and $str2.
6217
     *
6218
     * @param string $str1     <p>The input sting.</p>
6219
     * @param string $str2     <p>Second string for comparison.</p>
6220
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6221
     *
6222
     * @return string
6223
     */
6224 10
    public static function str_longest_common_prefix(
6225
        string $str1,
6226
        string $str2,
6227
        string $encoding = 'UTF-8'
6228
    ): string {
6229
        // init
6230 10
        $longest_common_prefix = '';
6231
6232 10
        if ($encoding === 'UTF-8') {
6233 5
            $max_length = (int) \min(
6234 5
                \mb_strlen($str1),
6235 5
                \mb_strlen($str2)
6236
            );
6237
6238 5
            for ($i = 0; $i < $max_length; ++$i) {
6239 4
                $char = \mb_substr($str1, $i, 1);
6240
6241
                if (
6242 4
                    $char !== false
6243
                    &&
6244 4
                    $char === \mb_substr($str2, $i, 1)
6245
                ) {
6246 3
                    $longest_common_prefix .= $char;
6247
                } else {
6248 3
                    break;
6249
                }
6250
            }
6251
        } else {
6252 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6253
6254 5
            $max_length = (int) \min(
6255 5
                self::strlen($str1, $encoding),
6256 5
                self::strlen($str2, $encoding)
6257
            );
6258
6259 5
            for ($i = 0; $i < $max_length; ++$i) {
6260 4
                $char = self::substr($str1, $i, 1, $encoding);
6261
6262
                if (
6263 4
                    $char !== false
6264
                    &&
6265 4
                    $char === self::substr($str2, $i, 1, $encoding)
6266
                ) {
6267 3
                    $longest_common_prefix .= $char;
6268
                } else {
6269 3
                    break;
6270
                }
6271
            }
6272
        }
6273
6274 10
        return $longest_common_prefix;
6275
    }
6276
6277
    /**
6278
     * Returns the longest common substring between the $str1 and $str2.
6279
     * In the case of ties, it returns that which occurs first.
6280
     *
6281
     * @param string $str1
6282
     * @param string $str2     <p>Second string for comparison.</p>
6283
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6284
     *
6285
     * @return string string with its $str being the longest common substring
6286
     */
6287 11
    public static function str_longest_common_substring(
6288
        string $str1,
6289
        string $str2,
6290
        string $encoding = 'UTF-8'
6291
    ): string {
6292 11
        if ($str1 === '' || $str2 === '') {
6293 2
            return '';
6294
        }
6295
6296
        // Uses dynamic programming to solve
6297
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6298
6299 9
        if ($encoding === 'UTF-8') {
6300 4
            $str_length = (int) \mb_strlen($str1);
6301 4
            $other_length = (int) \mb_strlen($str2);
6302
        } else {
6303 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6304
6305 5
            $str_length = (int) self::strlen($str1, $encoding);
6306 5
            $other_length = (int) self::strlen($str2, $encoding);
6307
        }
6308
6309
        // Return if either string is empty
6310 9
        if ($str_length === 0 || $other_length === 0) {
6311
            return '';
6312
        }
6313
6314 9
        $len = 0;
6315 9
        $end = 0;
6316 9
        $table = \array_fill(
6317 9
            0,
6318 9
            $str_length + 1,
6319 9
            \array_fill(0, $other_length + 1, 0)
6320
        );
6321
6322 9
        if ($encoding === 'UTF-8') {
6323 9
            for ($i = 1; $i <= $str_length; ++$i) {
6324 9
                for ($j = 1; $j <= $other_length; ++$j) {
6325 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6326 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6327
6328 9
                    if ($str_char === $other_char) {
6329 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6330 8
                        if ($table[$i][$j] > $len) {
6331 8
                            $len = $table[$i][$j];
6332 8
                            $end = $i;
6333
                        }
6334
                    } else {
6335 9
                        $table[$i][$j] = 0;
6336
                    }
6337
                }
6338
            }
6339
        } else {
6340
            for ($i = 1; $i <= $str_length; ++$i) {
6341
                for ($j = 1; $j <= $other_length; ++$j) {
6342
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6343
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6344
6345
                    if ($str_char === $other_char) {
6346
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6347
                        if ($table[$i][$j] > $len) {
6348
                            $len = $table[$i][$j];
6349
                            $end = $i;
6350
                        }
6351
                    } else {
6352
                        $table[$i][$j] = 0;
6353
                    }
6354
                }
6355
            }
6356
        }
6357
6358 9
        if ($encoding === 'UTF-8') {
6359 9
            return (string) \mb_substr($str1, $end - $len, $len);
6360
        }
6361
6362
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6363
    }
6364
6365
    /**
6366
     * Returns the longest common suffix between the $str1 and $str2.
6367
     *
6368
     * @param string $str1
6369
     * @param string $str2     <p>Second string for comparison.</p>
6370
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6371
     *
6372
     * @return string
6373
     */
6374 10
    public static function str_longest_common_suffix(
6375
        string $str1,
6376
        string $str2,
6377
        string $encoding = 'UTF-8'
6378
    ): string {
6379 10
        if ($str1 === '' || $str2 === '') {
6380 2
            return '';
6381
        }
6382
6383 8
        if ($encoding === 'UTF-8') {
6384 4
            $max_length = (int) \min(
6385 4
                \mb_strlen($str1, $encoding),
6386 4
                \mb_strlen($str2, $encoding)
6387
            );
6388
6389 4
            $longest_common_suffix = '';
6390 4
            for ($i = 1; $i <= $max_length; ++$i) {
6391 4
                $char = \mb_substr($str1, -$i, 1);
6392
6393
                if (
6394 4
                    $char !== false
6395
                    &&
6396 4
                    $char === \mb_substr($str2, -$i, 1)
6397
                ) {
6398 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6399
                } else {
6400 3
                    break;
6401
                }
6402
            }
6403
        } else {
6404 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6405
6406 4
            $max_length = (int) \min(
6407 4
                self::strlen($str1, $encoding),
6408 4
                self::strlen($str2, $encoding)
6409
            );
6410
6411 4
            $longest_common_suffix = '';
6412 4
            for ($i = 1; $i <= $max_length; ++$i) {
6413 4
                $char = self::substr($str1, -$i, 1, $encoding);
6414
6415
                if (
6416 4
                    $char !== false
6417
                    &&
6418 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6419
                ) {
6420 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6421
                } else {
6422 3
                    break;
6423
                }
6424
            }
6425
        }
6426
6427 8
        return $longest_common_suffix;
6428
    }
6429
6430
    /**
6431
     * Returns true if $str matches the supplied pattern, false otherwise.
6432
     *
6433
     * @param string $str     <p>The input string.</p>
6434
     * @param string $pattern <p>Regex pattern to match against.</p>
6435
     *
6436
     * @return bool whether or not $str matches the pattern
6437
     */
6438
    public static function str_matches_pattern(string $str, string $pattern): bool
6439
    {
6440
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6441
    }
6442
6443
    /**
6444
     * Returns whether or not a character exists at an index. Offsets may be
6445
     * negative to count from the last character in the string. Implements
6446
     * part of the ArrayAccess interface.
6447
     *
6448
     * @param string $str      <p>The input string.</p>
6449
     * @param int    $offset   <p>The index to check.</p>
6450
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6451
     *
6452
     * @return bool whether or not the index exists
6453
     */
6454 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6455
    {
6456
        // init
6457 6
        $length = (int) self::strlen($str, $encoding);
6458
6459 6
        if ($offset >= 0) {
6460 3
            return $length > $offset;
6461
        }
6462
6463 3
        return $length >= \abs($offset);
6464
    }
6465
6466
    /**
6467
     * Returns the character at the given index. Offsets may be negative to
6468
     * count from the last character in the string. Implements part of the
6469
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6470
     * does not exist.
6471
     *
6472
     * @param string $str      <p>The input string.</p>
6473
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6474
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6475
     *
6476
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6477
     *
6478
     * @return string the character at the specified index
6479
     */
6480 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6481
    {
6482
        // init
6483 2
        $length = (int) self::strlen($str);
6484
6485
        if (
6486 2
            ($index >= 0 && $length <= $index)
6487
            ||
6488 2
            $length < \abs($index)
6489
        ) {
6490 1
            throw new \OutOfBoundsException('No character exists at the index');
6491
        }
6492
6493 1
        return self::char_at($str, $index, $encoding);
6494
    }
6495
6496
    /**
6497
     * Pad a UTF-8 string to a given length with another string.
6498
     *
6499
     * @param string     $str        <p>The input string.</p>
6500
     * @param int        $pad_length <p>The length of return string.</p>
6501
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6502
     * @param int|string $pad_type   [optional] <p>
6503
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6504
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6505
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6506
     *                               </p>
6507
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6508
     *
6509
     * @return string returns the padded string
6510
     */
6511 41
    public static function str_pad(
6512
        string $str,
6513
        int $pad_length,
6514
        string $pad_string = ' ',
6515
        $pad_type = \STR_PAD_RIGHT,
6516
        string $encoding = 'UTF-8'
6517
    ): string {
6518 41
        if ($pad_length === 0 || $pad_string === '') {
6519 1
            return $str;
6520
        }
6521
6522 41
        if ($pad_type !== (int) $pad_type) {
6523 13
            if ($pad_type === 'left') {
6524 3
                $pad_type = \STR_PAD_LEFT;
6525 10
            } elseif ($pad_type === 'right') {
6526 6
                $pad_type = \STR_PAD_RIGHT;
6527 4
            } elseif ($pad_type === 'both') {
6528 3
                $pad_type = \STR_PAD_BOTH;
6529
            } else {
6530 1
                throw new \InvalidArgumentException(
6531 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6532
                );
6533
            }
6534
        }
6535
6536 40
        if ($encoding === 'UTF-8') {
6537 25
            $str_length = (int) \mb_strlen($str);
6538
6539 25
            if ($pad_length >= $str_length) {
6540
                switch ($pad_type) {
6541 25
                    case \STR_PAD_LEFT:
6542 8
                        $ps_length = (int) \mb_strlen($pad_string);
6543
6544 8
                        $diff = ($pad_length - $str_length);
6545
6546 8
                        $pre = (string) \mb_substr(
6547 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6548 8
                            0,
6549 8
                            $diff
6550
                        );
6551 8
                        $post = '';
6552
6553 8
                        break;
6554
6555 20
                    case \STR_PAD_BOTH:
6556 14
                        $diff = ($pad_length - $str_length);
6557
6558 14
                        $ps_length_left = (int) \floor($diff / 2);
6559
6560 14
                        $ps_length_right = (int) \ceil($diff / 2);
6561
6562 14
                        $pre = (string) \mb_substr(
6563 14
                            \str_repeat($pad_string, $ps_length_left),
6564 14
                            0,
6565 14
                            $ps_length_left
6566
                        );
6567 14
                        $post = (string) \mb_substr(
6568 14
                            \str_repeat($pad_string, $ps_length_right),
6569 14
                            0,
6570 14
                            $ps_length_right
6571
                        );
6572
6573 14
                        break;
6574
6575 9
                    case \STR_PAD_RIGHT:
6576
                    default:
6577 9
                        $ps_length = (int) \mb_strlen($pad_string);
6578
6579 9
                        $diff = ($pad_length - $str_length);
6580
6581 9
                        $post = (string) \mb_substr(
6582 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6583 9
                            0,
6584 9
                            $diff
6585
                        );
6586 9
                        $pre = '';
6587
                }
6588
6589 25
                return $pre . $str . $post;
6590
            }
6591
6592 3
            return $str;
6593
        }
6594
6595 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6596
6597 15
        $str_length = (int) self::strlen($str, $encoding);
6598
6599 15
        if ($pad_length >= $str_length) {
6600
            switch ($pad_type) {
6601 14
                case \STR_PAD_LEFT:
6602 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6603
6604 5
                    $diff = ($pad_length - $str_length);
6605
6606 5
                    $pre = (string) self::substr(
6607 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6608 5
                        0,
6609 5
                        $diff,
6610 5
                        $encoding
6611
                    );
6612 5
                    $post = '';
6613
6614 5
                    break;
6615
6616 9
                case \STR_PAD_BOTH:
6617 3
                    $diff = ($pad_length - $str_length);
6618
6619 3
                    $ps_length_left = (int) \floor($diff / 2);
6620
6621 3
                    $ps_length_right = (int) \ceil($diff / 2);
6622
6623 3
                    $pre = (string) self::substr(
6624 3
                        \str_repeat($pad_string, $ps_length_left),
6625 3
                        0,
6626 3
                        $ps_length_left,
6627 3
                        $encoding
6628
                    );
6629 3
                    $post = (string) self::substr(
6630 3
                        \str_repeat($pad_string, $ps_length_right),
6631 3
                        0,
6632 3
                        $ps_length_right,
6633 3
                        $encoding
6634
                    );
6635
6636 3
                    break;
6637
6638 6
                case \STR_PAD_RIGHT:
6639
                default:
6640 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6641
6642 6
                    $diff = ($pad_length - $str_length);
6643
6644 6
                    $post = (string) self::substr(
6645 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6646 6
                        0,
6647 6
                        $diff,
6648 6
                        $encoding
6649
                    );
6650 6
                    $pre = '';
6651
            }
6652
6653 14
            return $pre . $str . $post;
6654
        }
6655
6656 1
        return $str;
6657
    }
6658
6659
    /**
6660
     * Returns a new string of a given length such that both sides of the
6661
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
6662
     *
6663
     * @param string $str
6664
     * @param int    $length   <p>Desired string length after padding.</p>
6665
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6666
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6667
     *
6668
     * @return string
6669
     *                <p>The string with padding applied.</p>
6670
     */
6671 11
    public static function str_pad_both(
6672
        string $str,
6673
        int $length,
6674
        string $pad_str = ' ',
6675
        string $encoding = 'UTF-8'
6676
    ): string {
6677 11
        return self::str_pad(
6678 11
            $str,
6679 11
            $length,
6680 11
            $pad_str,
6681 11
            \STR_PAD_BOTH,
6682 11
            $encoding
6683
        );
6684
    }
6685
6686
    /**
6687
     * Returns a new string of a given length such that the beginning of the
6688
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
6689
     *
6690
     * @param string $str
6691
     * @param int    $length   <p>Desired string length after padding.</p>
6692
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6693
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6694
     *
6695
     * @return string
6696
     *                <p>The string with left padding.</p>
6697
     */
6698 7
    public static function str_pad_left(
6699
        string $str,
6700
        int $length,
6701
        string $pad_str = ' ',
6702
        string $encoding = 'UTF-8'
6703
    ): string {
6704 7
        return self::str_pad(
6705 7
            $str,
6706 7
            $length,
6707 7
            $pad_str,
6708 7
            \STR_PAD_LEFT,
6709 7
            $encoding
6710
        );
6711
    }
6712
6713
    /**
6714
     * Returns a new string of a given length such that the end of the string
6715
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
6716
     *
6717
     * @param string $str
6718
     * @param int    $length   <p>Desired string length after padding.</p>
6719
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6720
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6721
     *
6722
     * @return string
6723
     *                <p>The string with right padding.</p>
6724
     */
6725 7
    public static function str_pad_right(
6726
        string $str,
6727
        int $length,
6728
        string $pad_str = ' ',
6729
        string $encoding = 'UTF-8'
6730
    ): string {
6731 7
        return self::str_pad(
6732 7
            $str,
6733 7
            $length,
6734 7
            $pad_str,
6735 7
            \STR_PAD_RIGHT,
6736 7
            $encoding
6737
        );
6738
    }
6739
6740
    /**
6741
     * Repeat a string.
6742
     *
6743
     * @param string $str        <p>
6744
     *                           The string to be repeated.
6745
     *                           </p>
6746
     * @param int    $multiplier <p>
6747
     *                           Number of time the input string should be
6748
     *                           repeated.
6749
     *                           </p>
6750
     *                           <p>
6751
     *                           multiplier has to be greater than or equal to 0.
6752
     *                           If the multiplier is set to 0, the function
6753
     *                           will return an empty string.
6754
     *                           </p>
6755
     *
6756
     * @return string
6757
     *                <p>The repeated string.</P>
6758
     */
6759 9
    public static function str_repeat(string $str, int $multiplier): string
6760
    {
6761 9
        $str = self::filter($str);
6762
6763 9
        return \str_repeat($str, $multiplier);
6764
    }
6765
6766
    /**
6767
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6768
     *
6769
     * Replace all occurrences of the search string with the replacement string
6770
     *
6771
     * @see http://php.net/manual/en/function.str-replace.php
6772
     *
6773
     * @param mixed $search  <p>
6774
     *                       The value being searched for, otherwise known as the needle.
6775
     *                       An array may be used to designate multiple needles.
6776
     *                       </p>
6777
     * @param mixed $replace <p>
6778
     *                       The replacement value that replaces found search
6779
     *                       values. An array may be used to designate multiple replacements.
6780
     *                       </p>
6781
     * @param mixed $subject <p>
6782
     *                       The string or array being searched and replaced on,
6783
     *                       otherwise known as the haystack.
6784
     *                       </p>
6785
     *                       <p>
6786
     *                       If subject is an array, then the search and
6787
     *                       replace is performed with every entry of
6788
     *                       subject, and the return value is an array as
6789
     *                       well.
6790
     *                       </p>
6791
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6792
     *
6793
     * @return mixed this function returns a string or an array with the replaced values
6794
     */
6795 12
    public static function str_replace(
6796
        $search,
6797
        $replace,
6798
        $subject,
6799
        int &$count = null
6800
    ) {
6801
        /**
6802
         * @psalm-suppress PossiblyNullArgument
6803
         */
6804 12
        return \str_replace(
6805 12
            $search,
6806 12
            $replace,
6807 12
            $subject,
6808 12
            $count
6809
        );
6810
    }
6811
6812
    /**
6813
     * Replaces $search from the beginning of string with $replacement.
6814
     *
6815
     * @param string $str         <p>The input string.</p>
6816
     * @param string $search      <p>The string to search for.</p>
6817
     * @param string $replacement <p>The replacement.</p>
6818
     *
6819
     * @return string string after the replacements
6820
     */
6821 17
    public static function str_replace_beginning(
6822
        string $str,
6823
        string $search,
6824
        string $replacement
6825
    ): string {
6826 17
        if ($str === '') {
6827 4
            if ($replacement === '') {
6828 2
                return '';
6829
            }
6830
6831 2
            if ($search === '') {
6832 2
                return $replacement;
6833
            }
6834
        }
6835
6836 13
        if ($search === '') {
6837 2
            return $str . $replacement;
6838
        }
6839
6840 11
        if (\strpos($str, $search) === 0) {
6841 9
            return $replacement . \substr($str, \strlen($search));
6842
        }
6843
6844 2
        return $str;
6845
    }
6846
6847
    /**
6848
     * Replaces $search from the ending of string with $replacement.
6849
     *
6850
     * @param string $str         <p>The input string.</p>
6851
     * @param string $search      <p>The string to search for.</p>
6852
     * @param string $replacement <p>The replacement.</p>
6853
     *
6854
     * @return string string after the replacements
6855
     */
6856 17
    public static function str_replace_ending(
6857
        string $str,
6858
        string $search,
6859
        string $replacement
6860
    ): string {
6861 17
        if ($str === '') {
6862 4
            if ($replacement === '') {
6863 2
                return '';
6864
            }
6865
6866 2
            if ($search === '') {
6867 2
                return $replacement;
6868
            }
6869
        }
6870
6871 13
        if ($search === '') {
6872 2
            return $str . $replacement;
6873
        }
6874
6875 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6876 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6877
        }
6878
6879 11
        return $str;
6880
    }
6881
6882
    /**
6883
     * Replace the first "$search"-term with the "$replace"-term.
6884
     *
6885
     * @param string $search
6886
     * @param string $replace
6887
     * @param string $subject
6888
     *
6889
     * @return string
6890
     *
6891
     * @psalm-suppress InvalidReturnType
6892
     */
6893 2
    public static function str_replace_first(
6894
        string $search,
6895
        string $replace,
6896
        string $subject
6897
    ): string {
6898 2
        $pos = self::strpos($subject, $search);
6899
6900 2
        if ($pos !== false) {
6901
            /**
6902
             * @psalm-suppress InvalidReturnStatement
6903
             */
6904 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6905 2
                $subject,
6906 2
                $replace,
6907 2
                $pos,
6908 2
                (int) self::strlen($search)
6909
            );
6910
        }
6911
6912 2
        return $subject;
6913
    }
6914
6915
    /**
6916
     * Replace the last "$search"-term with the "$replace"-term.
6917
     *
6918
     * @param string $search
6919
     * @param string $replace
6920
     * @param string $subject
6921
     *
6922
     * @return string
6923
     *
6924
     * @psalm-suppress InvalidReturnType
6925
     */
6926 2
    public static function str_replace_last(
6927
        string $search,
6928
        string $replace,
6929
        string $subject
6930
    ): string {
6931 2
        $pos = self::strrpos($subject, $search);
6932 2
        if ($pos !== false) {
6933
            /**
6934
             * @psalm-suppress InvalidReturnStatement
6935
             */
6936 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6937 2
                $subject,
6938 2
                $replace,
6939 2
                $pos,
6940 2
                (int) self::strlen($search)
6941
            );
6942
        }
6943
6944 2
        return $subject;
6945
    }
6946
6947
    /**
6948
     * Shuffles all the characters in the string.
6949
     *
6950
     * PS: uses random algorithm which is weak for cryptography purposes
6951
     *
6952
     * @param string $str      <p>The input string</p>
6953
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6954
     *
6955
     * @return string the shuffled string
6956
     */
6957 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
6958
    {
6959 5
        if ($encoding === 'UTF-8') {
6960 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
6961
            /** @noinspection NonSecureShuffleUsageInspection */
6962 5
            \shuffle($indexes);
6963
6964
            // init
6965 5
            $shuffled_str = '';
6966
6967 5
            foreach ($indexes as &$i) {
6968 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
6969 5
                if ($tmp_sub_str !== false) {
6970 5
                    $shuffled_str .= $tmp_sub_str;
6971
                }
6972
            }
6973
        } else {
6974
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6975
6976
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
6977
            /** @noinspection NonSecureShuffleUsageInspection */
6978
            \shuffle($indexes);
6979
6980
            // init
6981
            $shuffled_str = '';
6982
6983
            foreach ($indexes as &$i) {
6984
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
6985
                if ($tmp_sub_str !== false) {
6986
                    $shuffled_str .= $tmp_sub_str;
6987
                }
6988
            }
6989
        }
6990
6991 5
        return $shuffled_str;
6992
    }
6993
6994
    /**
6995
     * Returns the substring beginning at $start, and up to, but not including
6996
     * the index specified by $end. If $end is omitted, the function extracts
6997
     * the remaining string. If $end is negative, it is computed from the end
6998
     * of the string.
6999
     *
7000
     * @param string $str
7001
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7002
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7003
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7004
     *
7005
     * @return false|string
7006
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7007
     *                      characters long, <b>FALSE</b> will be returned.
7008
     */
7009 18
    public static function str_slice(
7010
        string $str,
7011
        int $start,
7012
        int $end = null,
7013
        string $encoding = 'UTF-8'
7014
    ) {
7015 18
        if ($encoding === 'UTF-8') {
7016 7
            if ($end === null) {
7017 1
                $length = (int) \mb_strlen($str);
7018 6
            } elseif ($end >= 0 && $end <= $start) {
7019 2
                return '';
7020 4
            } elseif ($end < 0) {
7021 1
                $length = (int) \mb_strlen($str) + $end - $start;
7022
            } else {
7023 3
                $length = $end - $start;
7024
            }
7025
7026 5
            return \mb_substr($str, $start, $length);
7027
        }
7028
7029 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7030
7031 11
        if ($end === null) {
7032 5
            $length = (int) self::strlen($str, $encoding);
7033 6
        } elseif ($end >= 0 && $end <= $start) {
7034 2
            return '';
7035 4
        } elseif ($end < 0) {
7036 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7037
        } else {
7038 3
            $length = $end - $start;
7039
        }
7040
7041 9
        return self::substr($str, $start, $length, $encoding);
7042
    }
7043
7044
    /**
7045
     * Convert a string to e.g.: "snake_case"
7046
     *
7047
     * @param string $str
7048
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7049
     *
7050
     * @return string string in snake_case
7051
     */
7052 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7053
    {
7054 22
        if ($str === '') {
7055
            return '';
7056
        }
7057
7058 22
        $str = \str_replace(
7059 22
            '-',
7060 22
            '_',
7061 22
            self::normalize_whitespace($str)
7062
        );
7063
7064 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7065 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7066
        }
7067
7068 22
        $str = (string) \preg_replace_callback(
7069 22
            '/([\\p{N}|\\p{Lu}])/u',
7070
            /**
7071
             * @param string[] $matches
7072
             *
7073
             * @return string
7074
             */
7075
            static function (array $matches) use ($encoding): string {
7076 9
                $match = $matches[1];
7077 9
                $match_int = (int) $match;
7078
7079 9
                if ((string) $match_int === $match) {
7080 4
                    return '_' . $match . '_';
7081
                }
7082
7083 5
                if ($encoding === 'UTF-8') {
7084 5
                    return '_' . \mb_strtolower($match);
7085
                }
7086
7087
                return '_' . self::strtolower($match, $encoding);
7088 22
            },
7089 22
            $str
7090
        );
7091
7092 22
        $str = (string) \preg_replace(
7093
            [
7094 22
                '/\\s+/u',           // convert spaces to "_"
7095
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7096
                '/_+/',                 // remove double "_"
7097
            ],
7098
            [
7099 22
                '_',
7100
                '',
7101
                '_',
7102
            ],
7103 22
            $str
7104
        );
7105
7106 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7107
    }
7108
7109
    /**
7110
     * Sort all characters according to code points.
7111
     *
7112
     * @param string $str    <p>A UTF-8 string.</p>
7113
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7114
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7115
     *
7116
     * @return string string of sorted characters
7117
     */
7118 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7119
    {
7120 2
        $array = self::codepoints($str);
7121
7122 2
        if ($unique) {
7123 2
            $array = \array_flip(\array_flip($array));
7124
        }
7125
7126 2
        if ($desc) {
7127 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7127
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7128
        } else {
7129 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7129
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7130
        }
7131
7132 2
        return self::string($array);
7133
    }
7134
7135
    /**
7136
     * Convert a string to an array of Unicode characters.
7137
     *
7138
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7139
     * @param int                       $length                  [optional] <p>Max character length of each array
7140
     *                                                           element.</p>
7141
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7142
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7143
     *                                                           "mb_substr"</p>
7144
     *
7145
     * @return array
7146
     *               <p>An array containing chunks of the input.</p>
7147
     */
7148 89
    public static function str_split(
7149
        $str,
7150
        int $length = 1,
7151
        bool $clean_utf8 = false,
7152
        bool $try_to_use_mb_functions = true
7153
    ): array {
7154 89
        if ($length <= 0) {
7155 3
            return [];
7156
        }
7157
7158 88
        if (\is_array($str) === true) {
7159 2
            foreach ($str as $k => &$v) {
7160 2
                $v = self::str_split(
7161 2
                    $v,
7162 2
                    $length,
7163 2
                    $clean_utf8,
7164 2
                    $try_to_use_mb_functions
7165
                );
7166
            }
7167
7168 2
            return $str;
7169
        }
7170
7171
        // init
7172 88
        $str = (string) $str;
7173
7174 88
        if ($str === '') {
7175 13
            return [];
7176
        }
7177
7178 85
        if ($clean_utf8 === true) {
7179 19
            $str = self::clean($str);
7180
        }
7181
7182
        if (
7183 85
            $try_to_use_mb_functions === true
7184
            &&
7185 85
            self::$SUPPORT['mbstring'] === true
7186
        ) {
7187 81
            $i_max = \mb_strlen($str);
7188 81
            if ($i_max <= 127) {
7189 75
                $ret = [];
7190 75
                for ($i = 0; $i < $i_max; ++$i) {
7191 75
                    $ret[] = \mb_substr($str, $i, 1);
7192
                }
7193
            } else {
7194 16
                $return_array = [];
7195 16
                \preg_match_all('/./us', $str, $return_array);
7196 81
                $ret = $return_array[0] ?? [];
7197
            }
7198 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7199 17
            $return_array = [];
7200 17
            \preg_match_all('/./us', $str, $return_array);
7201 17
            $ret = $return_array[0] ?? [];
7202
        } else {
7203
7204
            // fallback
7205
7206 8
            $ret = [];
7207 8
            $len = \strlen($str);
7208
7209
            /** @noinspection ForeachInvariantsInspection */
7210 8
            for ($i = 0; $i < $len; ++$i) {
7211 8
                if (($str[$i] & "\x80") === "\x00") {
7212 8
                    $ret[] = $str[$i];
7213
                } elseif (
7214 8
                    isset($str[$i + 1])
7215
                    &&
7216 8
                    ($str[$i] & "\xE0") === "\xC0"
7217
                ) {
7218 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7219 4
                        $ret[] = $str[$i] . $str[$i + 1];
7220
7221 4
                        ++$i;
7222
                    }
7223
                } elseif (
7224 6
                    isset($str[$i + 2])
7225
                    &&
7226 6
                    ($str[$i] & "\xF0") === "\xE0"
7227
                ) {
7228
                    if (
7229 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7230
                        &&
7231 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7232
                    ) {
7233 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7234
7235 6
                        $i += 2;
7236
                    }
7237
                } elseif (
7238
                    isset($str[$i + 3])
7239
                    &&
7240
                    ($str[$i] & "\xF8") === "\xF0"
7241
                ) {
7242
                    if (
7243
                        ($str[$i + 1] & "\xC0") === "\x80"
7244
                        &&
7245
                        ($str[$i + 2] & "\xC0") === "\x80"
7246
                        &&
7247
                        ($str[$i + 3] & "\xC0") === "\x80"
7248
                    ) {
7249
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7250
7251
                        $i += 3;
7252
                    }
7253
                }
7254
            }
7255
        }
7256
7257 85
        if ($length > 1) {
7258 11
            $ret = \array_chunk($ret, $length);
7259
7260 11
            return \array_map(
7261
                static function (array &$item): string {
7262 11
                    return \implode('', $item);
7263 11
                },
7264 11
                $ret
7265
            );
7266
        }
7267
7268 78
        if (isset($ret[0]) && $ret[0] === '') {
7269
            return [];
7270
        }
7271
7272 78
        return $ret;
7273
    }
7274
7275
    /**
7276
     * Splits the string with the provided regular expression, returning an
7277
     * array of Stringy objects. An optional integer $limit will truncate the
7278
     * results.
7279
     *
7280
     * @param string $str
7281
     * @param string $pattern <p>The regex with which to split the string.</p>
7282
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7283
     *
7284
     * @return string[] an array of strings
7285
     */
7286 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7287
    {
7288 16
        if ($limit === 0) {
7289 2
            return [];
7290
        }
7291
7292 14
        if ($pattern === '') {
7293 1
            return [$str];
7294
        }
7295
7296 13
        if (self::$SUPPORT['mbstring'] === true) {
7297 13
            if ($limit >= 0) {
7298
                /** @noinspection PhpComposerExtensionStubsInspection */
7299 8
                $result_tmp = \mb_split($pattern, $str);
7300
7301 8
                $result = [];
7302 8
                foreach ($result_tmp as $item_tmp) {
7303 8
                    if ($limit === 0) {
7304 4
                        break;
7305
                    }
7306 8
                    --$limit;
7307
7308 8
                    $result[] = $item_tmp;
7309
                }
7310
7311 8
                return $result;
7312
            }
7313
7314
            /** @noinspection PhpComposerExtensionStubsInspection */
7315 5
            return \mb_split($pattern, $str);
7316
        }
7317
7318
        if ($limit > 0) {
7319
            ++$limit;
7320
        } else {
7321
            $limit = -1;
7322
        }
7323
7324
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7325
7326
        if ($array === false) {
7327
            return [];
7328
        }
7329
7330
        if ($limit > 0 && \count($array) === $limit) {
7331
            \array_pop($array);
7332
        }
7333
7334
        return $array;
7335
    }
7336
7337
    /**
7338
     * Check if the string starts with the given substring.
7339
     *
7340
     * @param string $haystack <p>The string to search in.</p>
7341
     * @param string $needle   <p>The substring to search for.</p>
7342
     *
7343
     * @return bool
7344
     */
7345 19
    public static function str_starts_with(string $haystack, string $needle): bool
7346
    {
7347 19
        if ($needle === '') {
7348 2
            return true;
7349
        }
7350
7351 19
        if ($haystack === '') {
7352
            return false;
7353
        }
7354
7355 19
        return \strpos($haystack, $needle) === 0;
7356
    }
7357
7358
    /**
7359
     * Returns true if the string begins with any of $substrings, false otherwise.
7360
     *
7361
     * - case-sensitive
7362
     *
7363
     * @param string $str        <p>The input string.</p>
7364
     * @param array  $substrings <p>Substrings to look for.</p>
7365
     *
7366
     * @return bool whether or not $str starts with $substring
7367
     */
7368 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7369
    {
7370 8
        if ($str === '') {
7371
            return false;
7372
        }
7373
7374 8
        if ($substrings === []) {
7375
            return false;
7376
        }
7377
7378 8
        foreach ($substrings as &$substring) {
7379 8
            if (self::str_starts_with($str, $substring)) {
7380 8
                return true;
7381
            }
7382
        }
7383
7384 6
        return false;
7385
    }
7386
7387
    /**
7388
     * Gets the substring after the first occurrence of a separator.
7389
     *
7390
     * @param string $str       <p>The input string.</p>
7391
     * @param string $separator <p>The string separator.</p>
7392
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7393
     *
7394
     * @return string
7395
     */
7396 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7397
    {
7398 1
        if ($separator === '' || $str === '') {
7399 1
            return '';
7400
        }
7401
7402 1
        if ($encoding === 'UTF-8') {
7403 1
            $offset = \mb_strpos($str, $separator);
7404 1
            if ($offset === false) {
7405 1
                return '';
7406
            }
7407
7408 1
            return (string) \mb_substr(
7409 1
                $str,
7410 1
                $offset + (int) \mb_strlen($separator)
7411
            );
7412
        }
7413
7414
        $offset = self::strpos($str, $separator, 0, $encoding);
7415
        if ($offset === false) {
7416
            return '';
7417
        }
7418
7419
        return (string) \mb_substr(
7420
            $str,
7421
            $offset + (int) self::strlen($separator, $encoding),
7422
            null,
7423
            $encoding
7424
        );
7425
    }
7426
7427
    /**
7428
     * Gets the substring after the last occurrence of a separator.
7429
     *
7430
     * @param string $str       <p>The input string.</p>
7431
     * @param string $separator <p>The string separator.</p>
7432
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7433
     *
7434
     * @return string
7435
     */
7436 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7437
    {
7438 1
        if ($separator === '' || $str === '') {
7439 1
            return '';
7440
        }
7441
7442 1
        if ($encoding === 'UTF-8') {
7443 1
            $offset = \mb_strrpos($str, $separator);
7444 1
            if ($offset === false) {
7445 1
                return '';
7446
            }
7447
7448 1
            return (string) \mb_substr(
7449 1
                $str,
7450 1
                $offset + (int) \mb_strlen($separator)
7451
            );
7452
        }
7453
7454
        $offset = self::strrpos($str, $separator, 0, $encoding);
7455
        if ($offset === false) {
7456
            return '';
7457
        }
7458
7459
        return (string) self::substr(
7460
            $str,
7461
            $offset + (int) self::strlen($separator, $encoding),
7462
            null,
7463
            $encoding
7464
        );
7465
    }
7466
7467
    /**
7468
     * Gets the substring before the first occurrence of a separator.
7469
     *
7470
     * @param string $str       <p>The input string.</p>
7471
     * @param string $separator <p>The string separator.</p>
7472
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7473
     *
7474
     * @return string
7475
     */
7476 1
    public static function str_substr_before_first_separator(
7477
        string $str,
7478
        string $separator,
7479
        string $encoding = 'UTF-8'
7480
    ): string {
7481 1
        if ($separator === '' || $str === '') {
7482 1
            return '';
7483
        }
7484
7485 1
        if ($encoding === 'UTF-8') {
7486 1
            $offset = \mb_strpos($str, $separator);
7487 1
            if ($offset === false) {
7488 1
                return '';
7489
            }
7490
7491 1
            return (string) \mb_substr(
7492 1
                $str,
7493 1
                0,
7494 1
                $offset
7495
            );
7496
        }
7497
7498
        $offset = self::strpos($str, $separator, 0, $encoding);
7499
        if ($offset === false) {
7500
            return '';
7501
        }
7502
7503
        return (string) self::substr(
7504
            $str,
7505
            0,
7506
            $offset,
7507
            $encoding
7508
        );
7509
    }
7510
7511
    /**
7512
     * Gets the substring before the last occurrence of a separator.
7513
     *
7514
     * @param string $str       <p>The input string.</p>
7515
     * @param string $separator <p>The string separator.</p>
7516
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7517
     *
7518
     * @return string
7519
     */
7520 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7521
    {
7522 1
        if ($separator === '' || $str === '') {
7523 1
            return '';
7524
        }
7525
7526 1
        if ($encoding === 'UTF-8') {
7527 1
            $offset = \mb_strrpos($str, $separator);
7528 1
            if ($offset === false) {
7529 1
                return '';
7530
            }
7531
7532 1
            return (string) \mb_substr(
7533 1
                $str,
7534 1
                0,
7535 1
                $offset
7536
            );
7537
        }
7538
7539
        $offset = self::strrpos($str, $separator, 0, $encoding);
7540
        if ($offset === false) {
7541
            return '';
7542
        }
7543
7544
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7545
7546
        return (string) self::substr(
7547
            $str,
7548
            0,
7549
            $offset,
7550
            $encoding
7551
        );
7552
    }
7553
7554
    /**
7555
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7556
     *
7557
     * @param string $str           <p>The input string.</p>
7558
     * @param string $needle        <p>The string to look for.</p>
7559
     * @param bool   $before_needle [optional] <p>Default: false</p>
7560
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7561
     *
7562
     * @return string
7563
     */
7564 2
    public static function str_substr_first(
7565
        string $str,
7566
        string $needle,
7567
        bool $before_needle = false,
7568
        string $encoding = 'UTF-8'
7569
    ): string {
7570 2
        if ($str === '' || $needle === '') {
7571 2
            return '';
7572
        }
7573
7574 2
        if ($encoding === 'UTF-8') {
7575 2
            if ($before_needle === true) {
7576 1
                $part = \mb_strstr(
7577 1
                    $str,
7578 1
                    $needle,
7579 1
                    $before_needle
7580
                );
7581
            } else {
7582 1
                $part = \mb_strstr(
7583 1
                    $str,
7584 2
                    $needle
7585
                );
7586
            }
7587
        } else {
7588
            $part = self::strstr(
7589
                $str,
7590
                $needle,
7591
                $before_needle,
7592
                $encoding
7593
            );
7594
        }
7595
7596 2
        return $part === false ? '' : $part;
7597
    }
7598
7599
    /**
7600
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7601
     *
7602
     * @param string $str           <p>The input string.</p>
7603
     * @param string $needle        <p>The string to look for.</p>
7604
     * @param bool   $before_needle [optional] <p>Default: false</p>
7605
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7606
     *
7607
     * @return string
7608
     */
7609 2
    public static function str_substr_last(
7610
        string $str,
7611
        string $needle,
7612
        bool $before_needle = false,
7613
        string $encoding = 'UTF-8'
7614
    ): string {
7615 2
        if ($str === '' || $needle === '') {
7616 2
            return '';
7617
        }
7618
7619 2
        if ($encoding === 'UTF-8') {
7620 2
            if ($before_needle === true) {
7621 1
                $part = \mb_strrchr(
7622 1
                    $str,
7623 1
                    $needle,
7624 1
                    $before_needle
7625
                );
7626
            } else {
7627 1
                $part = \mb_strrchr(
7628 1
                    $str,
7629 2
                    $needle
7630
                );
7631
            }
7632
        } else {
7633
            $part = self::strrchr(
7634
                $str,
7635
                $needle,
7636
                $before_needle,
7637
                $encoding
7638
            );
7639
        }
7640
7641 2
        return $part === false ? '' : $part;
7642
    }
7643
7644
    /**
7645
     * Surrounds $str with the given substring.
7646
     *
7647
     * @param string $str
7648
     * @param string $substring <p>The substring to add to both sides.</P>
7649
     *
7650
     * @return string string with the substring both prepended and appended
7651
     */
7652 5
    public static function str_surround(string $str, string $substring): string
7653
    {
7654 5
        return $substring . $str . $substring;
7655
    }
7656
7657
    /**
7658
     * Returns a trimmed string with the first letter of each word capitalized.
7659
     * Also accepts an array, $ignore, allowing you to list words not to be
7660
     * capitalized.
7661
     *
7662
     * @param string              $str
7663
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7664
     *                                                           Default: null</p>
7665
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7666
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7667
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7668
     *                                                           tr</p>
7669
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7670
     *                                                           ß</p>
7671
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7672
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7673
     *
7674
     * @return string
7675
     *                <p>The titleized string.</p>
7676
     */
7677 10
    public static function str_titleize(
7678
        string $str,
7679
        array $ignore = null,
7680
        string $encoding = 'UTF-8',
7681
        bool $clean_utf8 = false,
7682
        string $lang = null,
7683
        bool $try_to_keep_the_string_length = false,
7684
        bool $use_trim_first = true,
7685
        string $word_define_chars = null
7686
    ): string {
7687 10
        if ($str === '') {
7688
            return '';
7689
        }
7690
7691 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7692 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7693
        }
7694
7695 10
        if ($use_trim_first === true) {
7696 10
            $str = \trim($str);
7697
        }
7698
7699 10
        if ($clean_utf8 === true) {
7700
            $str = self::clean($str);
7701
        }
7702
7703 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7704
7705 10
        if ($word_define_chars) {
7706 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7707
        } else {
7708 6
            $word_define_chars = '';
7709
        }
7710
7711 10
        $str = (string) \preg_replace_callback(
7712 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7713
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7714 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7715 4
                    return $match[0];
7716
                }
7717
7718 10
                if ($use_mb_functions === true) {
7719 10
                    if ($encoding === 'UTF-8') {
7720 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7721 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7722
                    }
7723
7724
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7725
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7726
                }
7727
7728
                return self::ucfirst(
7729
                    self::strtolower(
7730
                        $match[0],
7731
                        $encoding,
7732
                        false,
7733
                        $lang,
7734
                        $try_to_keep_the_string_length
7735
                    ),
7736
                    $encoding,
7737
                    false,
7738
                    $lang,
7739
                    $try_to_keep_the_string_length
7740
                );
7741 10
            },
7742 10
            $str
7743
        );
7744
7745 10
        return $str;
7746
    }
7747
7748
    /**
7749
     * Returns a trimmed string in proper title case.
7750
     *
7751
     * Also accepts an array, $ignore, allowing you to list words not to be
7752
     * capitalized.
7753
     *
7754
     * Adapted from John Gruber's script.
7755
     *
7756
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7757
     *
7758
     * @param string $str
7759
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7760
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7761
     *
7762
     * @return string the titleized string
7763
     */
7764 35
    public static function str_titleize_for_humans(
7765
        string $str,
7766
        array $ignore = [],
7767
        string $encoding = 'UTF-8'
7768
    ): string {
7769 35
        $small_words = \array_merge(
7770
            [
7771 35
                '(?<!q&)a',
7772
                'an',
7773
                'and',
7774
                'as',
7775
                'at(?!&t)',
7776
                'but',
7777
                'by',
7778
                'en',
7779
                'for',
7780
                'if',
7781
                'in',
7782
                'of',
7783
                'on',
7784
                'or',
7785
                'the',
7786
                'to',
7787
                'v[.]?',
7788
                'via',
7789
                'vs[.]?',
7790
            ],
7791 35
            $ignore
7792
        );
7793
7794 35
        $small_words_rx = \implode('|', $small_words);
7795 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7796
7797 35
        $str = \trim($str);
7798
7799 35
        if (self::has_lowercase($str) === false) {
7800 2
            $str = self::strtolower($str, $encoding);
7801
        }
7802
7803
        // the main substitutions
7804 35
        $str = (string) \preg_replace_callback(
7805
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7806
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7807 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7808
                        |
7809 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7810
                        |
7811 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7812
                        |
7813 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7814
                      ) (_*) \\b                                                          # 6. With trailing underscore
7815
                    ~ux',
7816
            /**
7817
             * @param string[] $matches
7818
             *
7819
             * @return string
7820
             */
7821
            static function (array $matches) use ($encoding): string {
7822
                // preserve leading underscore
7823 35
                $str = $matches[1];
7824 35
                if ($matches[2]) {
7825
                    // preserve URLs, domains, emails and file paths
7826 5
                    $str .= $matches[2];
7827 35
                } elseif ($matches[3]) {
7828
                    // lower-case small words
7829 25
                    $str .= self::strtolower($matches[3], $encoding);
7830 35
                } elseif ($matches[4]) {
7831
                    // capitalize word w/o internal caps
7832 34
                    $str .= static::ucfirst($matches[4], $encoding);
7833
                } else {
7834
                    // preserve other kinds of word (iPhone)
7835 7
                    $str .= $matches[5];
7836
                }
7837
                // preserve trailing underscore
7838 35
                $str .= $matches[6];
7839
7840 35
                return $str;
7841 35
            },
7842 35
            $str
7843
        );
7844
7845
        // Exceptions for small words: capitalize at start of title...
7846 35
        $str = (string) \preg_replace_callback(
7847
            '~(  \\A [[:punct:]]*            # start of title...
7848
                      |  [:.;?!][ ]+                # or of subsentence...
7849
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7850 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7851
                     ~uxi',
7852
            /**
7853
             * @param string[] $matches
7854
             *
7855
             * @return string
7856
             */
7857
            static function (array $matches) use ($encoding): string {
7858 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7859 35
            },
7860 35
            $str
7861
        );
7862
7863
        // ...and end of title
7864 35
        $str = (string) \preg_replace_callback(
7865 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
7866
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7867
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7868
                     ~uxi',
7869
            /**
7870
             * @param string[] $matches
7871
             *
7872
             * @return string
7873
             */
7874
            static function (array $matches) use ($encoding): string {
7875 3
                return static::ucfirst($matches[1], $encoding);
7876 35
            },
7877 35
            $str
7878
        );
7879
7880
        // Exceptions for small words in hyphenated compound words.
7881
        // e.g. "in-flight" -> In-Flight
7882 35
        $str = (string) \preg_replace_callback(
7883
            '~\\b
7884
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7885 35
                        ( ' . $small_words_rx . ' )
7886
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7887
                       ~uxi',
7888
            /**
7889
             * @param string[] $matches
7890
             *
7891
             * @return string
7892
             */
7893
            static function (array $matches) use ($encoding): string {
7894
                return static::ucfirst($matches[1], $encoding);
7895 35
            },
7896 35
            $str
7897
        );
7898
7899
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7900 35
        $str = (string) \preg_replace_callback(
7901
            '~\\b
7902
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7903
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7904 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
7905
                      (?!	- )                 # Negative lookahead for another -
7906
                     ~uxi',
7907
            /**
7908
             * @param string[] $matches
7909
             *
7910
             * @return string
7911
             */
7912
            static function (array $matches) use ($encoding): string {
7913
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7914 35
            },
7915 35
            $str
7916
        );
7917
7918 35
        return $str;
7919
    }
7920
7921
    /**
7922
     * Get a binary representation of a specific string.
7923
     *
7924
     * @param string $str <p>The input string.</p>
7925
     *
7926
     * @return false|string
7927
     *                      <p>false on error</p>
7928
     */
7929 2
    public static function str_to_binary(string $str)
7930
    {
7931
        /** @var array|false $value - needed for PhpStan (stubs error) */
7932 2
        $value = \unpack('H*', $str);
7933 2
        if ($value === false) {
7934
            return false;
7935
        }
7936
7937
        /** @noinspection OffsetOperationsInspection */
7938 2
        return \base_convert($value[1], 16, 2);
7939
    }
7940
7941
    /**
7942
     * @param string   $str
7943
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
7944
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
7945
     *
7946
     * @return string[]
7947
     */
7948 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
7949
    {
7950 17
        if ($str === '') {
7951 1
            return $remove_empty_values === true ? [] : [''];
7952
        }
7953
7954 16
        if (self::$SUPPORT['mbstring'] === true) {
7955
            /** @noinspection PhpComposerExtensionStubsInspection */
7956 16
            $return = \mb_split("[\r\n]{1,2}", $str);
7957
        } else {
7958
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7959
        }
7960
7961 16
        if ($return === false) {
7962
            return $remove_empty_values === true ? [] : [''];
7963
        }
7964
7965
        if (
7966 16
            $remove_short_values === null
7967
            &&
7968 16
            $remove_empty_values === false
7969
        ) {
7970 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7971
        }
7972
7973
        return self::reduce_string_array(
7974
            $return,
7975
            $remove_empty_values,
7976
            $remove_short_values
7977
        );
7978
    }
7979
7980
    /**
7981
     * Convert a string into an array of words.
7982
     *
7983
     * @param string   $str
7984
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
7985
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
7986
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
7987
     *
7988
     * @return string[]
7989
     */
7990 13
    public static function str_to_words(
7991
        string $str,
7992
        string $char_list = '',
7993
        bool $remove_empty_values = false,
7994
        int $remove_short_values = null
7995
    ): array {
7996 13
        if ($str === '') {
7997 4
            return $remove_empty_values === true ? [] : [''];
7998
        }
7999
8000 13
        $char_list = self::rxClass($char_list, '\pL');
8001
8002 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8003 13
        if ($return === false) {
8004
            return $remove_empty_values === true ? [] : [''];
8005
        }
8006
8007
        if (
8008 13
            $remove_short_values === null
8009
            &&
8010 13
            $remove_empty_values === false
8011
        ) {
8012 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8013
        }
8014
8015 2
        $tmp_return = self::reduce_string_array(
8016 2
            $return,
8017 2
            $remove_empty_values,
8018 2
            $remove_short_values
8019
        );
8020
8021 2
        foreach ($tmp_return as &$item) {
8022 2
            $item = (string) $item;
8023
        }
8024
8025 2
        return $tmp_return;
8026
    }
8027
8028
    /**
8029
     * alias for "UTF8::to_ascii()"
8030
     *
8031
     * @param string $str
8032
     * @param string $unknown
8033
     * @param bool   $strict
8034
     *
8035
     * @return string
8036
     *
8037
     * @see UTF8::to_ascii()
8038
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
8039
     */
8040 7
    public static function str_transliterate(
8041
        string $str,
8042
        string $unknown = '?',
8043
        bool $strict = false
8044
    ): string {
8045 7
        return self::to_ascii($str, $unknown, $strict);
8046
    }
8047
8048
    /**
8049
     * Truncates the string to a given length. If $substring is provided, and
8050
     * truncating occurs, the string is further truncated so that the substring
8051
     * may be appended without exceeding the desired length.
8052
     *
8053
     * @param string $str
8054
     * @param int    $length    <p>Desired length of the truncated string.</p>
8055
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8056
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8057
     *
8058
     * @return string string after truncating
8059
     */
8060 22
    public static function str_truncate(
8061
        string $str,
8062
        int $length,
8063
        string $substring = '',
8064
        string $encoding = 'UTF-8'
8065
    ): string {
8066 22
        if ($str === '') {
8067
            return '';
8068
        }
8069
8070 22
        if ($encoding === 'UTF-8') {
8071 10
            if ($length >= (int) \mb_strlen($str)) {
8072 2
                return $str;
8073
            }
8074
8075 8
            if ($substring !== '') {
8076 4
                $length -= (int) \mb_strlen($substring);
8077
8078
                /** @noinspection UnnecessaryCastingInspection */
8079 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8080
            }
8081
8082
            /** @noinspection UnnecessaryCastingInspection */
8083 4
            return (string) \mb_substr($str, 0, $length);
8084
        }
8085
8086 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8087
8088 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8089 2
            return $str;
8090
        }
8091
8092 10
        if ($substring !== '') {
8093 6
            $length -= (int) self::strlen($substring, $encoding);
8094
        }
8095
8096
        return (
8097 10
               (string) self::substr(
8098 10
                   $str,
8099 10
                   0,
8100 10
                   $length,
8101 10
                   $encoding
8102
               )
8103 10
               ) . $substring;
8104
    }
8105
8106
    /**
8107
     * Truncates the string to a given length, while ensuring that it does not
8108
     * split words. If $substring is provided, and truncating occurs, the
8109
     * string is further truncated so that the substring may be appended without
8110
     * exceeding the desired length.
8111
     *
8112
     * @param string $str
8113
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8114
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8115
     *                                                       ''</p>
8116
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8117
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8118
     *
8119
     * @return string string after truncating
8120
     */
8121 47
    public static function str_truncate_safe(
8122
        string $str,
8123
        int $length,
8124
        string $substring = '',
8125
        string $encoding = 'UTF-8',
8126
        bool $ignore_do_not_split_words_for_one_word = false
8127
    ): string {
8128 47
        if ($str === '' || $length <= 0) {
8129 1
            return $substring;
8130
        }
8131
8132 47
        if ($encoding === 'UTF-8') {
8133 21
            if ($length >= (int) \mb_strlen($str)) {
8134 5
                return $str;
8135
            }
8136
8137
            // need to further trim the string so we can append the substring
8138 17
            $length -= (int) \mb_strlen($substring);
8139 17
            if ($length <= 0) {
8140 1
                return $substring;
8141
            }
8142
8143
            /** @var string|false $truncated - needed for PhpStan (stubs error) */
8144 17
            $truncated = \mb_substr($str, 0, $length);
8145 17
            if ($truncated === false) {
8146
                return '';
8147
            }
8148
8149
            // if the last word was truncated
8150 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8151 17
            if ($space_position !== $length) {
8152
                // find pos of the last occurrence of a space, get up to that
8153 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8154
8155
                if (
8156 13
                    $last_position !== false
8157
                    ||
8158 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8159
                ) {
8160 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8161
                }
8162
            }
8163
        } else {
8164 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8165
8166 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8167 4
                return $str;
8168
            }
8169
8170
            // need to further trim the string so we can append the substring
8171 22
            $length -= (int) self::strlen($substring, $encoding);
8172 22
            if ($length <= 0) {
8173
                return $substring;
8174
            }
8175
8176 22
            $truncated = self::substr($str, 0, $length, $encoding);
8177
8178 22
            if ($truncated === false) {
8179
                return '';
8180
            }
8181
8182
            // if the last word was truncated
8183 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8184 22
            if ($space_position !== $length) {
8185
                // find pos of the last occurrence of a space, get up to that
8186 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8187
8188
                if (
8189 12
                    $last_position !== false
8190
                    ||
8191 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8192
                ) {
8193 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8194
                }
8195
            }
8196
        }
8197
8198 39
        return $truncated . $substring;
8199
    }
8200
8201
    /**
8202
     * Returns a lowercase and trimmed string separated by underscores.
8203
     * Underscores are inserted before uppercase characters (with the exception
8204
     * of the first character of the string), and in place of spaces as well as
8205
     * dashes.
8206
     *
8207
     * @param string $str
8208
     *
8209
     * @return string the underscored string
8210
     */
8211 16
    public static function str_underscored(string $str): string
8212
    {
8213 16
        return self::str_delimit($str, '_');
8214
    }
8215
8216
    /**
8217
     * Returns an UpperCamelCase version of the supplied string. It trims
8218
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8219
     * and underscores, and removes spaces, dashes, underscores.
8220
     *
8221
     * @param string      $str                           <p>The input string.</p>
8222
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8223
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8224
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8225
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8226
     *
8227
     * @return string string in UpperCamelCase
8228
     */
8229 13
    public static function str_upper_camelize(
8230
        string $str,
8231
        string $encoding = 'UTF-8',
8232
        bool $clean_utf8 = false,
8233
        string $lang = null,
8234
        bool $try_to_keep_the_string_length = false
8235
    ): string {
8236 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8237
    }
8238
8239
    /**
8240
     * alias for "UTF8::ucfirst()"
8241
     *
8242
     * @param string      $str
8243
     * @param string      $encoding
8244
     * @param bool        $clean_utf8
8245
     * @param string|null $lang
8246
     * @param bool        $try_to_keep_the_string_length
8247
     *
8248
     * @return string
8249
     *
8250
     * @see UTF8::ucfirst()
8251
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8252
     */
8253 5
    public static function str_upper_first(
8254
        string $str,
8255
        string $encoding = 'UTF-8',
8256
        bool $clean_utf8 = false,
8257
        string $lang = null,
8258
        bool $try_to_keep_the_string_length = false
8259
    ): string {
8260 5
        return self::ucfirst(
8261 5
            $str,
8262 5
            $encoding,
8263 5
            $clean_utf8,
8264 5
            $lang,
8265 5
            $try_to_keep_the_string_length
8266
        );
8267
    }
8268
8269
    /**
8270
     * Get the number of words in a specific string.
8271
     *
8272
     * @param string $str       <p>The input string.</p>
8273
     * @param int    $format    [optional] <p>
8274
     *                          <strong>0</strong> => return a number of words (default)<br>
8275
     *                          <strong>1</strong> => return an array of words<br>
8276
     *                          <strong>2</strong> => return an array of words with word-offset as key
8277
     *                          </p>
8278
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8279
     *
8280
     * @return int|string[] The number of words in the string
8281
     */
8282 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8283
    {
8284 2
        $str_parts = self::str_to_words($str, $char_list);
8285
8286 2
        $len = \count($str_parts);
8287
8288 2
        if ($format === 1) {
8289 2
            $number_of_words = [];
8290 2
            for ($i = 1; $i < $len; $i += 2) {
8291 2
                $number_of_words[] = $str_parts[$i];
8292
            }
8293 2
        } elseif ($format === 2) {
8294 2
            $number_of_words = [];
8295 2
            $offset = (int) self::strlen($str_parts[0]);
8296 2
            for ($i = 1; $i < $len; $i += 2) {
8297 2
                $number_of_words[$offset] = $str_parts[$i];
8298 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8299
            }
8300
        } else {
8301 2
            $number_of_words = (int) (($len - 1) / 2);
8302
        }
8303
8304 2
        return $number_of_words;
8305
    }
8306
8307
    /**
8308
     * Case-insensitive string comparison.
8309
     *
8310
     * INFO: Case-insensitive version of UTF8::strcmp()
8311
     *
8312
     * @param string $str1     <p>The first string.</p>
8313
     * @param string $str2     <p>The second string.</p>
8314
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8315
     *
8316
     * @return int
8317
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8318
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8319
     *             <strong>0</strong> if they are equal
8320
     */
8321 23
    public static function strcasecmp(
8322
        string $str1,
8323
        string $str2,
8324
        string $encoding = 'UTF-8'
8325
    ): int {
8326 23
        return self::strcmp(
8327 23
            self::strtocasefold(
8328 23
                $str1,
8329 23
                true,
8330 23
                false,
8331 23
                $encoding,
8332 23
                null,
8333 23
                false
8334
            ),
8335 23
            self::strtocasefold(
8336 23
                $str2,
8337 23
                true,
8338 23
                false,
8339 23
                $encoding,
8340 23
                null,
8341 23
                false
8342
            )
8343
        );
8344
    }
8345
8346
    /**
8347
     * alias for "UTF8::strstr()"
8348
     *
8349
     * @param string $haystack
8350
     * @param string $needle
8351
     * @param bool   $before_needle
8352
     * @param string $encoding
8353
     * @param bool   $clean_utf8
8354
     *
8355
     * @return false|string
8356
     *
8357
     * @see UTF8::strstr()
8358
     * @deprecated <p>please use "UTF8::strstr()"</p>
8359
     */
8360 2
    public static function strchr(
8361
        string $haystack,
8362
        string $needle,
8363
        bool $before_needle = false,
8364
        string $encoding = 'UTF-8',
8365
        bool $clean_utf8 = false
8366
    ) {
8367 2
        return self::strstr(
8368 2
            $haystack,
8369 2
            $needle,
8370 2
            $before_needle,
8371 2
            $encoding,
8372 2
            $clean_utf8
8373
        );
8374
    }
8375
8376
    /**
8377
     * Case-sensitive string comparison.
8378
     *
8379
     * @param string $str1 <p>The first string.</p>
8380
     * @param string $str2 <p>The second string.</p>
8381
     *
8382
     * @return int
8383
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8384
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8385
     *             <strong>0</strong> if they are equal
8386
     */
8387 29
    public static function strcmp(string $str1, string $str2): int
8388
    {
8389 29
        if ($str1 === $str2) {
8390 21
            return 0;
8391
        }
8392
8393 24
        return \strcmp(
8394 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8395 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8396
        );
8397
    }
8398
8399
    /**
8400
     * Find length of initial segment not matching mask.
8401
     *
8402
     * @param string $str
8403
     * @param string $char_list
8404
     * @param int    $offset
8405
     * @param int    $length
8406
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8407
     *
8408
     * @return int
8409
     */
8410 12
    public static function strcspn(
8411
        string $str,
8412
        string $char_list,
8413
        int $offset = null,
8414
        int $length = null,
8415
        string $encoding = 'UTF-8'
8416
    ): int {
8417 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8418
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8419
        }
8420
8421 12
        if ($char_list === '') {
8422 2
            return (int) self::strlen($str, $encoding);
8423
        }
8424
8425 11
        if ($offset !== null || $length !== null) {
8426 3
            if ($encoding === 'UTF-8') {
8427 3
                if ($length === null) {
8428
                    /** @noinspection UnnecessaryCastingInspection */
8429 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8430
                } else {
8431
                    /** @noinspection UnnecessaryCastingInspection */
8432 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8433
                }
8434
            } else {
8435
                /** @noinspection UnnecessaryCastingInspection */
8436
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8437
            }
8438
8439 3
            if ($str_tmp === false) {
8440
                return 0;
8441
            }
8442
8443
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8444 3
            $str = $str_tmp;
8445
        }
8446
8447 11
        if ($str === '') {
8448 2
            return 0;
8449
        }
8450
8451 10
        $matches = [];
8452 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8453 9
            $return = self::strlen($matches[1], $encoding);
8454 9
            if ($return === false) {
8455
                return 0;
8456
            }
8457
8458 9
            return $return;
8459
        }
8460
8461 2
        return (int) self::strlen($str, $encoding);
8462
    }
8463
8464
    /**
8465
     * alias for "UTF8::stristr()"
8466
     *
8467
     * @param string $haystack
8468
     * @param string $needle
8469
     * @param bool   $before_needle
8470
     * @param string $encoding
8471
     * @param bool   $clean_utf8
8472
     *
8473
     * @return false|string
8474
     *
8475
     * @see UTF8::stristr()
8476
     * @deprecated <p>please use "UTF8::stristr()"</p>
8477
     */
8478 1
    public static function strichr(
8479
        string $haystack,
8480
        string $needle,
8481
        bool $before_needle = false,
8482
        string $encoding = 'UTF-8',
8483
        bool $clean_utf8 = false
8484
    ) {
8485 1
        return self::stristr(
8486 1
            $haystack,
8487 1
            $needle,
8488 1
            $before_needle,
8489 1
            $encoding,
8490 1
            $clean_utf8
8491
        );
8492
    }
8493
8494
    /**
8495
     * Create a UTF-8 string from code points.
8496
     *
8497
     * INFO: opposite to UTF8::codepoints()
8498
     *
8499
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8500
     *
8501
     * @return string UTF-8 encoded string
8502
     */
8503 4
    public static function string(array $array): string
8504
    {
8505 4
        return \implode(
8506 4
            '',
8507 4
            \array_map(
8508
                [
8509 4
                    self::class,
8510
                    'chr',
8511
                ],
8512 4
                $array
8513
            )
8514
        );
8515
    }
8516
8517
    /**
8518
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8519
     *
8520
     * @param string $str <p>The input string.</p>
8521
     *
8522
     * @return bool
8523
     *              <strong>true</strong> if the string has BOM at the start,<br>
8524
     *              <strong>false</strong> otherwise
8525
     */
8526 6
    public static function string_has_bom(string $str): bool
8527
    {
8528
        /** @noinspection PhpUnusedLocalVariableInspection */
8529 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8530 6
            if (\strpos($str, $bom_string) === 0) {
8531 6
                return true;
8532
            }
8533
        }
8534
8535 6
        return false;
8536
    }
8537
8538
    /**
8539
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8540
     *
8541
     * @see http://php.net/manual/en/function.strip-tags.php
8542
     *
8543
     * @param string $str            <p>
8544
     *                               The input string.
8545
     *                               </p>
8546
     * @param string $allowable_tags [optional] <p>
8547
     *                               You can use the optional second parameter to specify tags which should
8548
     *                               not be stripped.
8549
     *                               </p>
8550
     *                               <p>
8551
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8552
     *                               can not be changed with allowable_tags.
8553
     *                               </p>
8554
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8555
     *
8556
     * @return string
8557
     *                <p>The stripped string.</p>
8558
     */
8559 4
    public static function strip_tags(
8560
        string $str,
8561
        string $allowable_tags = null,
8562
        bool $clean_utf8 = false
8563
    ): string {
8564 4
        if ($str === '') {
8565 1
            return '';
8566
        }
8567
8568 4
        if ($clean_utf8 === true) {
8569 2
            $str = self::clean($str);
8570
        }
8571
8572 4
        if ($allowable_tags === null) {
8573 4
            return \strip_tags($str);
8574
        }
8575
8576 2
        return \strip_tags($str, $allowable_tags);
8577
    }
8578
8579
    /**
8580
     * Strip all whitespace characters. This includes tabs and newline
8581
     * characters, as well as multibyte whitespace such as the thin space
8582
     * and ideographic space.
8583
     *
8584
     * @param string $str
8585
     *
8586
     * @return string
8587
     */
8588 36
    public static function strip_whitespace(string $str): string
8589
    {
8590 36
        if ($str === '') {
8591 3
            return '';
8592
        }
8593
8594 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8595
    }
8596
8597
    /**
8598
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
8599
     *
8600
     * @see http://php.net/manual/en/function.mb-stripos.php
8601
     *
8602
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8603
     * @param string $needle     <p>The string to find in haystack.</p>
8604
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8605
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8606
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8607
     *
8608
     * @return false|int
8609
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8610
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8611
     */
8612 24
    public static function stripos(
8613
        string $haystack,
8614
        string $needle,
8615
        int $offset = 0,
8616
        $encoding = 'UTF-8',
8617
        bool $clean_utf8 = false
8618
    ) {
8619 24
        if ($haystack === '' || $needle === '') {
8620 5
            return false;
8621
        }
8622
8623 23
        if ($clean_utf8 === true) {
8624
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8625
            // if invalid characters are found in $haystack before $needle
8626 1
            $haystack = self::clean($haystack);
8627 1
            $needle = self::clean($needle);
8628
        }
8629
8630 23
        if (self::$SUPPORT['mbstring'] === true) {
8631 23
            if ($encoding === 'UTF-8') {
8632 23
                return \mb_stripos($haystack, $needle, $offset);
8633
            }
8634
8635 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8636
8637 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8638
        }
8639
8640 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8641
8642
        if (
8643 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8644
            &&
8645 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8646
            &&
8647 2
            self::$SUPPORT['intl'] === true
8648
        ) {
8649
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8650
            if ($return_tmp !== false) {
8651
                return $return_tmp;
8652
            }
8653
        }
8654
8655
        //
8656
        // fallback for ascii only
8657
        //
8658
8659 2
        if (ASCII::is_ascii($haystack . $needle)) {
8660
            return \stripos($haystack, $needle, $offset);
8661
        }
8662
8663
        //
8664
        // fallback via vanilla php
8665
        //
8666
8667 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8668 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8669
8670 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8671
    }
8672
8673
    /**
8674
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8675
     *
8676
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8677
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8678
     * @param bool   $before_needle [optional] <p>
8679
     *                              If <b>TRUE</b>, it returns the part of the
8680
     *                              haystack before the first occurrence of the needle (excluding the needle).
8681
     *                              </p>
8682
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8683
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8684
     *
8685
     * @return false|string
8686
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8687
     */
8688 12
    public static function stristr(
8689
        string $haystack,
8690
        string $needle,
8691
        bool $before_needle = false,
8692
        string $encoding = 'UTF-8',
8693
        bool $clean_utf8 = false
8694
    ) {
8695 12
        if ($haystack === '' || $needle === '') {
8696 3
            return false;
8697
        }
8698
8699 9
        if ($clean_utf8 === true) {
8700
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8701
            // if invalid characters are found in $haystack before $needle
8702 1
            $needle = self::clean($needle);
8703 1
            $haystack = self::clean($haystack);
8704
        }
8705
8706 9
        if (!$needle) {
8707
            return $haystack;
8708
        }
8709
8710 9
        if (self::$SUPPORT['mbstring'] === true) {
8711 9
            if ($encoding === 'UTF-8') {
8712 9
                return \mb_stristr($haystack, $needle, $before_needle);
8713
            }
8714
8715 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8716
8717 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8718
        }
8719
8720
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8721
8722
        if (
8723
            $encoding !== 'UTF-8'
8724
            &&
8725
            self::$SUPPORT['mbstring'] === false
8726
        ) {
8727
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8728
        }
8729
8730
        if (
8731
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8732
            &&
8733
            self::$SUPPORT['intl'] === true
8734
        ) {
8735
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8736
            if ($return_tmp !== false) {
8737
                return $return_tmp;
8738
            }
8739
        }
8740
8741
        if (ASCII::is_ascii($needle . $haystack)) {
8742
            return \stristr($haystack, $needle, $before_needle);
8743
        }
8744
8745
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8746
8747
        if (!isset($match[1])) {
8748
            return false;
8749
        }
8750
8751
        if ($before_needle) {
8752
            return $match[1];
8753
        }
8754
8755
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8756
    }
8757
8758
    /**
8759
     * Get the string length, not the byte-length!
8760
     *
8761
     * @see http://php.net/manual/en/function.mb-strlen.php
8762
     *
8763
     * @param string $str        <p>The string being checked for length.</p>
8764
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8765
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8766
     *
8767
     * @return false|int
8768
     *                   <p>
8769
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8770
     *                   $encoding.
8771
     *                   (One multi-byte character counted as +1).
8772
     *                   <br>
8773
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8774
     *                   chars.
8775
     *                   </p>
8776
     */
8777 173
    public static function strlen(
8778
        string $str,
8779
        string $encoding = 'UTF-8',
8780
        bool $clean_utf8 = false
8781
    ) {
8782 173
        if ($str === '') {
8783 21
            return 0;
8784
        }
8785
8786 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8787 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8788
        }
8789
8790 171
        if ($clean_utf8 === true) {
8791
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8792
            // if invalid characters are found in $str
8793 4
            $str = self::clean($str);
8794
        }
8795
8796
        //
8797
        // fallback via mbstring
8798
        //
8799
8800 171
        if (self::$SUPPORT['mbstring'] === true) {
8801 165
            if ($encoding === 'UTF-8') {
8802 165
                return \mb_strlen($str);
8803
            }
8804
8805 4
            return \mb_strlen($str, $encoding);
8806
        }
8807
8808
        //
8809
        // fallback for binary || ascii only
8810
        //
8811
8812
        if (
8813 8
            $encoding === 'CP850'
8814
            ||
8815 8
            $encoding === 'ASCII'
8816
        ) {
8817
            return \strlen($str);
8818
        }
8819
8820
        if (
8821 8
            $encoding !== 'UTF-8'
8822
            &&
8823 8
            self::$SUPPORT['mbstring'] === false
8824
            &&
8825 8
            self::$SUPPORT['iconv'] === false
8826
        ) {
8827 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8828
        }
8829
8830
        //
8831
        // fallback via iconv
8832
        //
8833
8834 8
        if (self::$SUPPORT['iconv'] === true) {
8835
            $return_tmp = \iconv_strlen($str, $encoding);
8836
            if ($return_tmp !== false) {
8837
                return $return_tmp;
8838
            }
8839
        }
8840
8841
        //
8842
        // fallback via intl
8843
        //
8844
8845
        if (
8846 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8847
            &&
8848 8
            self::$SUPPORT['intl'] === true
8849
        ) {
8850
            $return_tmp = \grapheme_strlen($str);
8851
            if ($return_tmp !== null) {
8852
                return $return_tmp;
8853
            }
8854
        }
8855
8856
        //
8857
        // fallback for ascii only
8858
        //
8859
8860 8
        if (ASCII::is_ascii($str)) {
8861 4
            return \strlen($str);
8862
        }
8863
8864
        //
8865
        // fallback via vanilla php
8866
        //
8867
8868 8
        \preg_match_all('/./us', $str, $parts);
8869
8870 8
        $return_tmp = \count($parts[0]);
8871 8
        if ($return_tmp === 0) {
8872
            return false;
8873
        }
8874
8875 8
        return $return_tmp;
8876
    }
8877
8878
    /**
8879
     * Get string length in byte.
8880
     *
8881
     * @param string $str
8882
     *
8883
     * @return int
8884
     */
8885
    public static function strlen_in_byte(string $str): int
8886
    {
8887
        if ($str === '') {
8888
            return 0;
8889
        }
8890
8891
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8892
            // "mb_" is available if overload is used, so use it ...
8893
            return \mb_strlen($str, 'CP850'); // 8-BIT
8894
        }
8895
8896
        return \strlen($str);
8897
    }
8898
8899
    /**
8900
     * Case-insensitive string comparisons using a "natural order" algorithm.
8901
     *
8902
     * INFO: natural order version of UTF8::strcasecmp()
8903
     *
8904
     * @param string $str1     <p>The first string.</p>
8905
     * @param string $str2     <p>The second string.</p>
8906
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8907
     *
8908
     * @return int
8909
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8910
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8911
     *             <strong>0</strong> if they are equal
8912
     */
8913 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8914
    {
8915 2
        return self::strnatcmp(
8916 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8917 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8918
        );
8919
    }
8920
8921
    /**
8922
     * String comparisons using a "natural order" algorithm
8923
     *
8924
     * INFO: natural order version of UTF8::strcmp()
8925
     *
8926
     * @see http://php.net/manual/en/function.strnatcmp.php
8927
     *
8928
     * @param string $str1 <p>The first string.</p>
8929
     * @param string $str2 <p>The second string.</p>
8930
     *
8931
     * @return int
8932
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8933
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8934
     *             <strong>0</strong> if they are equal
8935
     */
8936 4
    public static function strnatcmp(string $str1, string $str2): int
8937
    {
8938 4
        if ($str1 === $str2) {
8939 4
            return 0;
8940
        }
8941
8942 4
        return \strnatcmp(
8943 4
            (string) self::strtonatfold($str1),
8944 4
            (string) self::strtonatfold($str2)
8945
        );
8946
    }
8947
8948
    /**
8949
     * Case-insensitive string comparison of the first n characters.
8950
     *
8951
     * @see http://php.net/manual/en/function.strncasecmp.php
8952
     *
8953
     * @param string $str1     <p>The first string.</p>
8954
     * @param string $str2     <p>The second string.</p>
8955
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8956
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8957
     *
8958
     * @return int
8959
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8960
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8961
     *             <strong>0</strong> if they are equal
8962
     */
8963 2
    public static function strncasecmp(
8964
        string $str1,
8965
        string $str2,
8966
        int $len,
8967
        string $encoding = 'UTF-8'
8968
    ): int {
8969 2
        return self::strncmp(
8970 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8971 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8972 2
            $len
8973
        );
8974
    }
8975
8976
    /**
8977
     * String comparison of the first n characters.
8978
     *
8979
     * @see http://php.net/manual/en/function.strncmp.php
8980
     *
8981
     * @param string $str1     <p>The first string.</p>
8982
     * @param string $str2     <p>The second string.</p>
8983
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8984
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8985
     *
8986
     * @return int
8987
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8988
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8989
     *             <strong>0</strong> if they are equal
8990
     */
8991 4
    public static function strncmp(
8992
        string $str1,
8993
        string $str2,
8994
        int $len,
8995
        string $encoding = 'UTF-8'
8996
    ): int {
8997 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8998
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8999
        }
9000
9001 4
        if ($encoding === 'UTF-8') {
9002 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9003 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9004
        } else {
9005
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9006
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9007
        }
9008
9009 4
        return self::strcmp($str1, $str2);
9010
    }
9011
9012
    /**
9013
     * Search a string for any of a set of characters.
9014
     *
9015
     * @see http://php.net/manual/en/function.strpbrk.php
9016
     *
9017
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9018
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9019
     *
9020
     * @return false|string string starting from the character found, or false if it is not found
9021
     */
9022 2
    public static function strpbrk(string $haystack, string $char_list)
9023
    {
9024 2
        if ($haystack === '' || $char_list === '') {
9025 2
            return false;
9026
        }
9027
9028 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9029 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9030
        }
9031
9032 2
        return false;
9033
    }
9034
9035
    /**
9036
     * Find the position of the first occurrence of a substring in a string.
9037
     *
9038
     * @see http://php.net/manual/en/function.mb-strpos.php
9039
     *
9040
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9041
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9042
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9043
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9044
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9045
     *
9046
     * @return false|int
9047
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9048
     *                   string.<br> If needle is not found it returns false.
9049
     */
9050 53
    public static function strpos(
9051
        string $haystack,
9052
        $needle,
9053
        int $offset = 0,
9054
        $encoding = 'UTF-8',
9055
        bool $clean_utf8 = false
9056
    ) {
9057 53
        if ($haystack === '') {
9058 4
            return false;
9059
        }
9060
9061
        // iconv and mbstring do not support integer $needle
9062 52
        if ((int) $needle === $needle) {
9063
            $needle = (string) self::chr($needle);
9064
        }
9065 52
        $needle = (string) $needle;
9066
9067 52
        if ($needle === '') {
9068 2
            return false;
9069
        }
9070
9071 52
        if ($clean_utf8 === true) {
9072
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9073
            // if invalid characters are found in $haystack before $needle
9074 3
            $needle = self::clean($needle);
9075 3
            $haystack = self::clean($haystack);
9076
        }
9077
9078 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9079 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9080
        }
9081
9082
        //
9083
        // fallback via mbstring
9084
        //
9085
9086 52
        if (self::$SUPPORT['mbstring'] === true) {
9087 50
            if ($encoding === 'UTF-8') {
9088 50
                return \mb_strpos($haystack, $needle, $offset);
9089
            }
9090
9091 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9092
        }
9093
9094
        //
9095
        // fallback for binary || ascii only
9096
        //
9097
        if (
9098 4
            $encoding === 'CP850'
9099
            ||
9100 4
            $encoding === 'ASCII'
9101
        ) {
9102 2
            return \strpos($haystack, $needle, $offset);
9103
        }
9104
9105
        if (
9106 4
            $encoding !== 'UTF-8'
9107
            &&
9108 4
            self::$SUPPORT['iconv'] === false
9109
            &&
9110 4
            self::$SUPPORT['mbstring'] === false
9111
        ) {
9112 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9113
        }
9114
9115
        //
9116
        // fallback via intl
9117
        //
9118
9119
        if (
9120 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9121
            &&
9122 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9123
            &&
9124 4
            self::$SUPPORT['intl'] === true
9125
        ) {
9126
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9127
            if ($return_tmp !== false) {
9128
                return $return_tmp;
9129
            }
9130
        }
9131
9132
        //
9133
        // fallback via iconv
9134
        //
9135
9136
        if (
9137 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9138
            &&
9139 4
            self::$SUPPORT['iconv'] === true
9140
        ) {
9141
            // ignore invalid negative offset to keep compatibility
9142
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9143
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9144
            if ($return_tmp !== false) {
9145
                return $return_tmp;
9146
            }
9147
        }
9148
9149
        //
9150
        // fallback for ascii only
9151
        //
9152
9153 4
        if (ASCII::is_ascii($haystack . $needle)) {
9154 2
            return \strpos($haystack, $needle, $offset);
9155
        }
9156
9157
        //
9158
        // fallback via vanilla php
9159
        //
9160
9161 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9162 4
        if ($haystack_tmp === false) {
9163
            $haystack_tmp = '';
9164
        }
9165 4
        $haystack = (string) $haystack_tmp;
9166
9167 4
        if ($offset < 0) {
9168
            $offset = 0;
9169
        }
9170
9171 4
        $pos = \strpos($haystack, $needle);
9172 4
        if ($pos === false) {
9173 2
            return false;
9174
        }
9175
9176 4
        if ($pos) {
9177 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9178
        }
9179
9180 2
        return $offset + 0;
9181
    }
9182
9183
    /**
9184
     * Find the position of the first occurrence of a substring in a string.
9185
     *
9186
     * @param string $haystack <p>
9187
     *                         The string being checked.
9188
     *                         </p>
9189
     * @param string $needle   <p>
9190
     *                         The position counted from the beginning of haystack.
9191
     *                         </p>
9192
     * @param int    $offset   [optional] <p>
9193
     *                         The search offset. If it is not specified, 0 is used.
9194
     *                         </p>
9195
     *
9196
     * @return false|int The numeric position of the first occurrence of needle in the
9197
     *                   haystack string. If needle is not found, it returns false.
9198
     */
9199
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9200
    {
9201
        if ($haystack === '' || $needle === '') {
9202
            return false;
9203
        }
9204
9205
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9206
            // "mb_" is available if overload is used, so use it ...
9207
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9208
        }
9209
9210
        return \strpos($haystack, $needle, $offset);
9211
    }
9212
9213
    /**
9214
     * Find the last occurrence of a character in a string within another.
9215
     *
9216
     * @see http://php.net/manual/en/function.mb-strrchr.php
9217
     *
9218
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9219
     * @param string $needle        <p>The string to find in haystack</p>
9220
     * @param bool   $before_needle [optional] <p>
9221
     *                              Determines which portion of haystack
9222
     *                              this function returns.
9223
     *                              If set to true, it returns all of haystack
9224
     *                              from the beginning to the last occurrence of needle.
9225
     *                              If set to false, it returns all of haystack
9226
     *                              from the last occurrence of needle to the end,
9227
     *                              </p>
9228
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9229
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9230
     *
9231
     * @return false|string the portion of haystack or false if needle is not found
9232
     */
9233 2
    public static function strrchr(
9234
        string $haystack,
9235
        string $needle,
9236
        bool $before_needle = false,
9237
        string $encoding = 'UTF-8',
9238
        bool $clean_utf8 = false
9239
    ) {
9240 2
        if ($haystack === '' || $needle === '') {
9241 2
            return false;
9242
        }
9243
9244 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9245 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9246
        }
9247
9248 2
        if ($clean_utf8 === true) {
9249
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9250
            // if invalid characters are found in $haystack before $needle
9251 2
            $needle = self::clean($needle);
9252 2
            $haystack = self::clean($haystack);
9253
        }
9254
9255
        //
9256
        // fallback via mbstring
9257
        //
9258
9259 2
        if (self::$SUPPORT['mbstring'] === true) {
9260 2
            if ($encoding === 'UTF-8') {
9261 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9262
            }
9263
9264 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9265
        }
9266
9267
        //
9268
        // fallback for binary || ascii only
9269
        //
9270
9271
        if (
9272
            $before_needle === false
9273
            &&
9274
            (
9275
                $encoding === 'CP850'
9276
                ||
9277
                $encoding === 'ASCII'
9278
            )
9279
        ) {
9280
            return \strrchr($haystack, $needle);
9281
        }
9282
9283
        if (
9284
            $encoding !== 'UTF-8'
9285
            &&
9286
            self::$SUPPORT['mbstring'] === false
9287
        ) {
9288
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9289
        }
9290
9291
        //
9292
        // fallback via iconv
9293
        //
9294
9295
        if (self::$SUPPORT['iconv'] === true) {
9296
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9297
            if ($needle_tmp === false) {
9298
                return false;
9299
            }
9300
            $needle = (string) $needle_tmp;
9301
9302
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9303
            if ($pos === false) {
9304
                return false;
9305
            }
9306
9307
            if ($before_needle) {
9308
                return self::substr($haystack, 0, $pos, $encoding);
9309
            }
9310
9311
            return self::substr($haystack, $pos, null, $encoding);
9312
        }
9313
9314
        //
9315
        // fallback via vanilla php
9316
        //
9317
9318
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9319
        if ($needle_tmp === false) {
9320
            return false;
9321
        }
9322
        $needle = (string) $needle_tmp;
9323
9324
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9325
        if ($pos === false) {
9326
            return false;
9327
        }
9328
9329
        if ($before_needle) {
9330
            return self::substr($haystack, 0, $pos, $encoding);
9331
        }
9332
9333
        return self::substr($haystack, $pos, null, $encoding);
9334
    }
9335
9336
    /**
9337
     * Reverses characters order in the string.
9338
     *
9339
     * @param string $str      <p>The input string.</p>
9340
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9341
     *
9342
     * @return string the string with characters in the reverse sequence
9343
     */
9344 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9345
    {
9346 10
        if ($str === '') {
9347 4
            return '';
9348
        }
9349
9350
        // init
9351 8
        $reversed = '';
9352
9353 8
        $str = self::emoji_encode($str, true);
9354
9355 8
        if ($encoding === 'UTF-8') {
9356 8
            if (self::$SUPPORT['intl'] === true) {
9357
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9358 8
                $i = (int) \grapheme_strlen($str);
9359 8
                while ($i--) {
9360 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9361 8
                    if ($reversed_tmp !== false) {
9362 8
                        $reversed .= $reversed_tmp;
9363
                    }
9364
                }
9365
            } else {
9366
                $i = (int) \mb_strlen($str);
9367 8
                while ($i--) {
9368
                    $reversed_tmp = \mb_substr($str, $i, 1);
9369
                    if ($reversed_tmp !== false) {
9370
                        $reversed .= $reversed_tmp;
9371
                    }
9372
                }
9373
            }
9374
        } else {
9375
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9376
9377
            $i = (int) self::strlen($str, $encoding);
9378
            while ($i--) {
9379
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9380
                if ($reversed_tmp !== false) {
9381
                    $reversed .= $reversed_tmp;
9382
                }
9383
            }
9384
        }
9385
9386 8
        return self::emoji_decode($reversed, true);
9387
    }
9388
9389
    /**
9390
     * Find the last occurrence of a character in a string within another, case-insensitive.
9391
     *
9392
     * @see http://php.net/manual/en/function.mb-strrichr.php
9393
     *
9394
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9395
     * @param string $needle        <p>The string to find in haystack.</p>
9396
     * @param bool   $before_needle [optional] <p>
9397
     *                              Determines which portion of haystack
9398
     *                              this function returns.
9399
     *                              If set to true, it returns all of haystack
9400
     *                              from the beginning to the last occurrence of needle.
9401
     *                              If set to false, it returns all of haystack
9402
     *                              from the last occurrence of needle to the end,
9403
     *                              </p>
9404
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9405
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9406
     *
9407
     * @return false|string the portion of haystack or<br>false if needle is not found
9408
     */
9409 3
    public static function strrichr(
9410
        string $haystack,
9411
        string $needle,
9412
        bool $before_needle = false,
9413
        string $encoding = 'UTF-8',
9414
        bool $clean_utf8 = false
9415
    ) {
9416 3
        if ($haystack === '' || $needle === '') {
9417 2
            return false;
9418
        }
9419
9420 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9421 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9422
        }
9423
9424 3
        if ($clean_utf8 === true) {
9425
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9426
            // if invalid characters are found in $haystack before $needle
9427 2
            $needle = self::clean($needle);
9428 2
            $haystack = self::clean($haystack);
9429
        }
9430
9431
        //
9432
        // fallback via mbstring
9433
        //
9434
9435 3
        if (self::$SUPPORT['mbstring'] === true) {
9436 3
            if ($encoding === 'UTF-8') {
9437 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9438
            }
9439
9440 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9441
        }
9442
9443
        //
9444
        // fallback via vanilla php
9445
        //
9446
9447
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9448
        if ($needle_tmp === false) {
9449
            return false;
9450
        }
9451
        $needle = (string) $needle_tmp;
9452
9453
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9454
        if ($pos === false) {
9455
            return false;
9456
        }
9457
9458
        if ($before_needle) {
9459
            return self::substr($haystack, 0, $pos, $encoding);
9460
        }
9461
9462
        return self::substr($haystack, $pos, null, $encoding);
9463
    }
9464
9465
    /**
9466
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
9467
     *
9468
     * @param string     $haystack   <p>The string to look in.</p>
9469
     * @param int|string $needle     <p>The string to look for.</p>
9470
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9471
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9472
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9473
     *
9474
     * @return false|int
9475
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9476
     *                   string.<br>If needle is not found, it returns false.</p>
9477
     */
9478 3
    public static function strripos(
9479
        string $haystack,
9480
        $needle,
9481
        int $offset = 0,
9482
        string $encoding = 'UTF-8',
9483
        bool $clean_utf8 = false
9484
    ) {
9485 3
        if ($haystack === '') {
9486
            return false;
9487
        }
9488
9489
        // iconv and mbstring do not support integer $needle
9490 3
        if ((int) $needle === $needle && $needle >= 0) {
9491
            $needle = (string) self::chr($needle);
9492
        }
9493 3
        $needle = (string) $needle;
9494
9495 3
        if ($needle === '') {
9496
            return false;
9497
        }
9498
9499 3
        if ($clean_utf8 === true) {
9500
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9501 2
            $needle = self::clean($needle);
9502 2
            $haystack = self::clean($haystack);
9503
        }
9504
9505 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9506 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9507
        }
9508
9509
        //
9510
        // fallback via mbstrig
9511
        //
9512
9513 3
        if (self::$SUPPORT['mbstring'] === true) {
9514 3
            if ($encoding === 'UTF-8') {
9515 3
                return \mb_strripos($haystack, $needle, $offset);
9516
            }
9517
9518
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9519
        }
9520
9521
        //
9522
        // fallback for binary || ascii only
9523
        //
9524
9525
        if (
9526
            $encoding === 'CP850'
9527
            ||
9528
            $encoding === 'ASCII'
9529
        ) {
9530
            return \strripos($haystack, $needle, $offset);
9531
        }
9532
9533
        if (
9534
            $encoding !== 'UTF-8'
9535
            &&
9536
            self::$SUPPORT['mbstring'] === false
9537
        ) {
9538
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9539
        }
9540
9541
        //
9542
        // fallback via intl
9543
        //
9544
9545
        if (
9546
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9547
            &&
9548
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9549
            &&
9550
            self::$SUPPORT['intl'] === true
9551
        ) {
9552
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9553
            if ($return_tmp !== false) {
9554
                return $return_tmp;
9555
            }
9556
        }
9557
9558
        //
9559
        // fallback for ascii only
9560
        //
9561
9562
        if (ASCII::is_ascii($haystack . $needle)) {
9563
            return \strripos($haystack, $needle, $offset);
9564
        }
9565
9566
        //
9567
        // fallback via vanilla php
9568
        //
9569
9570
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9571
        $needle = self::strtocasefold($needle, true, false, $encoding);
9572
9573
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9574
    }
9575
9576
    /**
9577
     * Finds position of last occurrence of a string within another, case-insensitive.
9578
     *
9579
     * @param string $haystack <p>
9580
     *                         The string from which to get the position of the last occurrence
9581
     *                         of needle.
9582
     *                         </p>
9583
     * @param string $needle   <p>
9584
     *                         The string to find in haystack.
9585
     *                         </p>
9586
     * @param int    $offset   [optional] <p>
9587
     *                         The position in haystack
9588
     *                         to start searching.
9589
     *                         </p>
9590
     *
9591
     * @return false|int
9592
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9593
     *                   haystack string, or false if needle is not found.</p>
9594
     */
9595
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9596
    {
9597
        if ($haystack === '' || $needle === '') {
9598
            return false;
9599
        }
9600
9601
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9602
            // "mb_" is available if overload is used, so use it ...
9603
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9604
        }
9605
9606
        return \strripos($haystack, $needle, $offset);
9607
    }
9608
9609
    /**
9610
     * Find the position of the last occurrence of a substring in a string.
9611
     *
9612
     * @see http://php.net/manual/en/function.mb-strrpos.php
9613
     *
9614
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9615
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9616
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9617
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9618
     *                               the end of the string.
9619
     *                               </p>
9620
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9621
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9622
     *
9623
     * @return false|int
9624
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9625
     *                   string.<br>If needle is not found, it returns false.</p>
9626
     */
9627 35
    public static function strrpos(
9628
        string $haystack,
9629
        $needle,
9630
        int $offset = 0,
9631
        string $encoding = 'UTF-8',
9632
        bool $clean_utf8 = false
9633
    ) {
9634 35
        if ($haystack === '') {
9635 3
            return false;
9636
        }
9637
9638
        // iconv and mbstring do not support integer $needle
9639 34
        if ((int) $needle === $needle && $needle >= 0) {
9640 2
            $needle = (string) self::chr($needle);
9641
        }
9642 34
        $needle = (string) $needle;
9643
9644 34
        if ($needle === '') {
9645 2
            return false;
9646
        }
9647
9648 34
        if ($clean_utf8 === true) {
9649
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9650 4
            $needle = self::clean($needle);
9651 4
            $haystack = self::clean($haystack);
9652
        }
9653
9654 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9655 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9656
        }
9657
9658
        //
9659
        // fallback via mbstring
9660
        //
9661
9662 34
        if (self::$SUPPORT['mbstring'] === true) {
9663 34
            if ($encoding === 'UTF-8') {
9664 34
                return \mb_strrpos($haystack, $needle, $offset);
9665
            }
9666
9667 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9668
        }
9669
9670
        //
9671
        // fallback for binary || ascii only
9672
        //
9673
9674
        if (
9675
            $encoding === 'CP850'
9676
            ||
9677
            $encoding === 'ASCII'
9678
        ) {
9679
            return \strrpos($haystack, $needle, $offset);
9680
        }
9681
9682
        if (
9683
            $encoding !== 'UTF-8'
9684
            &&
9685
            self::$SUPPORT['mbstring'] === false
9686
        ) {
9687
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9688
        }
9689
9690
        //
9691
        // fallback via intl
9692
        //
9693
9694
        if (
9695
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9696
            &&
9697
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9698
            &&
9699
            self::$SUPPORT['intl'] === true
9700
        ) {
9701
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9702
            if ($return_tmp !== false) {
9703
                return $return_tmp;
9704
            }
9705
        }
9706
9707
        //
9708
        // fallback for ascii only
9709
        //
9710
9711
        if (ASCII::is_ascii($haystack . $needle)) {
9712
            return \strrpos($haystack, $needle, $offset);
9713
        }
9714
9715
        //
9716
        // fallback via vanilla php
9717
        //
9718
9719
        $haystack_tmp = null;
9720
        if ($offset > 0) {
9721
            $haystack_tmp = self::substr($haystack, $offset);
9722
        } elseif ($offset < 0) {
9723
            $haystack_tmp = self::substr($haystack, 0, $offset);
9724
            $offset = 0;
9725
        }
9726
9727
        if ($haystack_tmp !== null) {
9728
            if ($haystack_tmp === false) {
9729
                $haystack_tmp = '';
9730
            }
9731
            $haystack = (string) $haystack_tmp;
9732
        }
9733
9734
        $pos = \strrpos($haystack, $needle);
9735
        if ($pos === false) {
9736
            return false;
9737
        }
9738
9739
        /** @var string|false $str_tmp - needed for PhpStan (stubs error) */
9740
        $str_tmp = \substr($haystack, 0, $pos);
9741
        if ($str_tmp === false) {
9742
            return false;
9743
        }
9744
9745
        return $offset + (int) self::strlen($str_tmp);
9746
    }
9747
9748
    /**
9749
     * Find the position of the last occurrence of a substring in a string.
9750
     *
9751
     * @param string $haystack <p>
9752
     *                         The string being checked, for the last occurrence
9753
     *                         of needle.
9754
     *                         </p>
9755
     * @param string $needle   <p>
9756
     *                         The string to find in haystack.
9757
     *                         </p>
9758
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9759
     *                         the string. Negative values will stop searching at an arbitrary point
9760
     *                         prior to the end of the string.
9761
     *                         </p>
9762
     *
9763
     * @return false|int
9764
     *                   <p>The numeric position of the last occurrence of needle in the
9765
     *                   haystack string. If needle is not found, it returns false.</p>
9766
     */
9767
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9768
    {
9769
        if ($haystack === '' || $needle === '') {
9770
            return false;
9771
        }
9772
9773
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9774
            // "mb_" is available if overload is used, so use it ...
9775
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9776
        }
9777
9778
        return \strrpos($haystack, $needle, $offset);
9779
    }
9780
9781
    /**
9782
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9783
     * mask.
9784
     *
9785
     * @param string $str      <p>The input string.</p>
9786
     * @param string $mask     <p>The mask of chars</p>
9787
     * @param int    $offset   [optional]
9788
     * @param int    $length   [optional]
9789
     * @param string $encoding [optional] <p>Set the charset.</p>
9790
     *
9791
     * @return false|int
9792
     */
9793 10
    public static function strspn(
9794
        string $str,
9795
        string $mask,
9796
        int $offset = 0,
9797
        int $length = null,
9798
        string $encoding = 'UTF-8'
9799
    ) {
9800 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9801
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9802
        }
9803
9804 10
        if ($offset || $length !== null) {
9805 2
            if ($encoding === 'UTF-8') {
9806 2
                if ($length === null) {
9807
                    $str = (string) \mb_substr($str, $offset);
9808
                } else {
9809 2
                    $str = (string) \mb_substr($str, $offset, $length);
9810
                }
9811
            } else {
9812
                $str = (string) self::substr($str, $offset, $length, $encoding);
9813
            }
9814
        }
9815
9816 10
        if ($str === '' || $mask === '') {
9817 2
            return 0;
9818
        }
9819
9820 8
        $matches = [];
9821
9822 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9823
    }
9824
9825
    /**
9826
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9827
     *
9828
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9829
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9830
     * @param bool   $before_needle [optional] <p>
9831
     *                              If <b>TRUE</b>, strstr() returns the part of the
9832
     *                              haystack before the first occurrence of the needle (excluding the needle).
9833
     *                              </p>
9834
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9835
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9836
     *
9837
     * @return false|string
9838
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9839
     */
9840 3
    public static function strstr(
9841
        string $haystack,
9842
        string $needle,
9843
        bool $before_needle = false,
9844
        string $encoding = 'UTF-8',
9845
        $clean_utf8 = false
9846
    ) {
9847 3
        if ($haystack === '' || $needle === '') {
9848 2
            return false;
9849
        }
9850
9851 3
        if ($clean_utf8 === true) {
9852
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9853
            // if invalid characters are found in $haystack before $needle
9854
            $needle = self::clean($needle);
9855
            $haystack = self::clean($haystack);
9856
        }
9857
9858 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9859 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9860
        }
9861
9862
        //
9863
        // fallback via mbstring
9864
        //
9865
9866 3
        if (self::$SUPPORT['mbstring'] === true) {
9867 3
            if ($encoding === 'UTF-8') {
9868 3
                return \mb_strstr($haystack, $needle, $before_needle);
9869
            }
9870
9871 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9872
        }
9873
9874
        //
9875
        // fallback for binary || ascii only
9876
        //
9877
9878
        if (
9879
            $encoding === 'CP850'
9880
            ||
9881
            $encoding === 'ASCII'
9882
        ) {
9883
            return \strstr($haystack, $needle, $before_needle);
9884
        }
9885
9886
        if (
9887
            $encoding !== 'UTF-8'
9888
            &&
9889
            self::$SUPPORT['mbstring'] === false
9890
        ) {
9891
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9892
        }
9893
9894
        //
9895
        // fallback via intl
9896
        //
9897
9898
        if (
9899
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9900
            &&
9901
            self::$SUPPORT['intl'] === true
9902
        ) {
9903
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
9904
            if ($return_tmp !== false) {
9905
                return $return_tmp;
9906
            }
9907
        }
9908
9909
        //
9910
        // fallback for ascii only
9911
        //
9912
9913
        if (ASCII::is_ascii($haystack . $needle)) {
9914
            return \strstr($haystack, $needle, $before_needle);
9915
        }
9916
9917
        //
9918
        // fallback via vanilla php
9919
        //
9920
9921
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9922
9923
        if (!isset($match[1])) {
9924
            return false;
9925
        }
9926
9927
        if ($before_needle) {
9928
            return $match[1];
9929
        }
9930
9931
        return self::substr($haystack, (int) self::strlen($match[1]));
9932
    }
9933
9934
    /**
9935
     *  * Finds first occurrence of a string within another.
9936
     *
9937
     * @param string $haystack      <p>
9938
     *                              The string from which to get the first occurrence
9939
     *                              of needle.
9940
     *                              </p>
9941
     * @param string $needle        <p>
9942
     *                              The string to find in haystack.
9943
     *                              </p>
9944
     * @param bool   $before_needle [optional] <p>
9945
     *                              Determines which portion of haystack
9946
     *                              this function returns.
9947
     *                              If set to true, it returns all of haystack
9948
     *                              from the beginning to the first occurrence of needle.
9949
     *                              If set to false, it returns all of haystack
9950
     *                              from the first occurrence of needle to the end,
9951
     *                              </p>
9952
     *
9953
     * @return false|string
9954
     *                      <p>The portion of haystack,
9955
     *                      or false if needle is not found.</p>
9956
     */
9957
    public static function strstr_in_byte(
9958
        string $haystack,
9959
        string $needle,
9960
        bool $before_needle = false
9961
    ) {
9962
        if ($haystack === '' || $needle === '') {
9963
            return false;
9964
        }
9965
9966
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9967
            // "mb_" is available if overload is used, so use it ...
9968
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9969
        }
9970
9971
        return \strstr($haystack, $needle, $before_needle);
9972
    }
9973
9974
    /**
9975
     * Unicode transformation for case-less matching.
9976
     *
9977
     * @see http://unicode.org/reports/tr21/tr21-5.html
9978
     *
9979
     * @param string      $str        <p>The input string.</p>
9980
     * @param bool        $full       [optional] <p>
9981
     *                                <b>true</b>, replace full case folding chars (default)<br>
9982
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9983
     *                                </p>
9984
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9985
     * @param string      $encoding   [optional] <p>Set the charset.</p>
9986
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9987
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9988
     *                                is for some languages better ...</p>
9989
     *
9990
     * @return string
9991
     */
9992 32
    public static function strtocasefold(
9993
        string $str,
9994
        bool $full = true,
9995
        bool $clean_utf8 = false,
9996
        string $encoding = 'UTF-8',
9997
        string $lang = null,
9998
        $lower = true
9999
    ): string {
10000 32
        if ($str === '') {
10001 5
            return '';
10002
        }
10003
10004 31
        if ($clean_utf8 === true) {
10005
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10006
            // if invalid characters are found in $haystack before $needle
10007 2
            $str = self::clean($str);
10008
        }
10009
10010 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10011
10012 31
        if ($lang === null && $encoding === 'UTF-8') {
10013 31
            if ($lower === true) {
10014 2
                return \mb_strtolower($str);
10015
            }
10016
10017 29
            return \mb_strtoupper($str);
10018
        }
10019
10020 2
        if ($lower === true) {
10021
            return self::strtolower($str, $encoding, false, $lang);
10022
        }
10023
10024 2
        return self::strtoupper($str, $encoding, false, $lang);
10025
    }
10026
10027
    /**
10028
     * Make a string lowercase.
10029
     *
10030
     * @see http://php.net/manual/en/function.mb-strtolower.php
10031
     *
10032
     * @param string      $str                           <p>The string being lowercased.</p>
10033
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10034
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10035
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10036
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10037
     *
10038
     * @return string
10039
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10040
     */
10041 73
    public static function strtolower(
10042
        $str,
10043
        string $encoding = 'UTF-8',
10044
        bool $clean_utf8 = false,
10045
        string $lang = null,
10046
        bool $try_to_keep_the_string_length = false
10047
    ): string {
10048
        // init
10049 73
        $str = (string) $str;
10050
10051 73
        if ($str === '') {
10052 1
            return '';
10053
        }
10054
10055 72
        if ($clean_utf8 === true) {
10056
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10057
            // if invalid characters are found in $haystack before $needle
10058 2
            $str = self::clean($str);
10059
        }
10060
10061
        // hack for old php version or for the polyfill ...
10062 72
        if ($try_to_keep_the_string_length === true) {
10063
            $str = self::fixStrCaseHelper($str, true);
10064
        }
10065
10066 72
        if ($lang === null && $encoding === 'UTF-8') {
10067 13
            return \mb_strtolower($str);
10068
        }
10069
10070 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10071
10072 61
        if ($lang !== null) {
10073 2
            if (self::$SUPPORT['intl'] === true) {
10074 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10075
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10076
                }
10077
10078 2
                $language_code = $lang . '-Lower';
10079 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10080
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10081
10082
                    $language_code = 'Any-Lower';
10083
                }
10084
10085
                /** @noinspection PhpComposerExtensionStubsInspection */
10086
                /** @noinspection UnnecessaryCastingInspection */
10087 2
                return (string) \transliterator_transliterate($language_code, $str);
10088
            }
10089
10090
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10091
        }
10092
10093
        // always fallback via symfony polyfill
10094 61
        return \mb_strtolower($str, $encoding);
10095
    }
10096
10097
    /**
10098
     * Make a string uppercase.
10099
     *
10100
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10101
     *
10102
     * @param string      $str                           <p>The string being uppercased.</p>
10103
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10104
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10105
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10106
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10107
     *
10108
     * @return string
10109
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10110
     */
10111 17
    public static function strtoupper(
10112
        $str,
10113
        string $encoding = 'UTF-8',
10114
        bool $clean_utf8 = false,
10115
        string $lang = null,
10116
        bool $try_to_keep_the_string_length = false
10117
    ): string {
10118
        // init
10119 17
        $str = (string) $str;
10120
10121 17
        if ($str === '') {
10122 1
            return '';
10123
        }
10124
10125 16
        if ($clean_utf8 === true) {
10126
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10127
            // if invalid characters are found in $haystack before $needle
10128 2
            $str = self::clean($str);
10129
        }
10130
10131
        // hack for old php version or for the polyfill ...
10132 16
        if ($try_to_keep_the_string_length === true) {
10133 2
            $str = self::fixStrCaseHelper($str, false);
10134
        }
10135
10136 16
        if ($lang === null && $encoding === 'UTF-8') {
10137 8
            return \mb_strtoupper($str);
10138
        }
10139
10140 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10141
10142 10
        if ($lang !== null) {
10143 2
            if (self::$SUPPORT['intl'] === true) {
10144 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10145
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10146
                }
10147
10148 2
                $language_code = $lang . '-Upper';
10149 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10150
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10151
10152
                    $language_code = 'Any-Upper';
10153
                }
10154
10155
                /** @noinspection PhpComposerExtensionStubsInspection */
10156
                /** @noinspection UnnecessaryCastingInspection */
10157 2
                return (string) \transliterator_transliterate($language_code, $str);
10158
            }
10159
10160
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10161
        }
10162
10163
        // always fallback via symfony polyfill
10164 10
        return \mb_strtoupper($str, $encoding);
10165
    }
10166
10167
    /**
10168
     * Translate characters or replace sub-strings.
10169
     *
10170
     * @see http://php.net/manual/en/function.strtr.php
10171
     *
10172
     * @param string          $str  <p>The string being translated.</p>
10173
     * @param string|string[] $from <p>The string replacing from.</p>
10174
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10175
     *
10176
     * @return string
10177
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10178
     *                corresponding character in "to".</p>
10179
     */
10180 2
    public static function strtr(string $str, $from, $to = ''): string
10181
    {
10182 2
        if ($str === '') {
10183
            return '';
10184
        }
10185
10186 2
        if ($from === $to) {
10187
            return $str;
10188
        }
10189
10190 2
        if ($to !== '') {
10191 2
            $from = self::str_split($from);
10192 2
            $to = self::str_split($to);
10193 2
            $count_from = \count($from);
10194 2
            $count_to = \count($to);
10195
10196 2
            if ($count_from > $count_to) {
10197 2
                $from = \array_slice($from, 0, $count_to);
10198 2
            } elseif ($count_from < $count_to) {
10199 2
                $to = \array_slice($to, 0, $count_from);
10200
            }
10201
10202 2
            $from = \array_combine($from, $to);
10203
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10204 2
            if ($from === false) {
10205
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10206
            }
10207
        }
10208
10209 2
        if (\is_string($from)) {
10210 2
            return \str_replace($from, '', $str);
10211
        }
10212
10213 2
        return \strtr($str, $from);
10214
    }
10215
10216
    /**
10217
     * Return the width of a string.
10218
     *
10219
     * @param string $str        <p>The input string.</p>
10220
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10221
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10222
     *
10223
     * @return int
10224
     */
10225 2
    public static function strwidth(
10226
        string $str,
10227
        string $encoding = 'UTF-8',
10228
        bool $clean_utf8 = false
10229
    ): int {
10230 2
        if ($str === '') {
10231 2
            return 0;
10232
        }
10233
10234 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10235 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10236
        }
10237
10238 2
        if ($clean_utf8 === true) {
10239
            // iconv and mbstring are not tolerant to invalid encoding
10240
            // further, their behaviour is inconsistent with that of PHP's substr
10241 2
            $str = self::clean($str);
10242
        }
10243
10244
        //
10245
        // fallback via mbstring
10246
        //
10247
10248 2
        if (self::$SUPPORT['mbstring'] === true) {
10249 2
            if ($encoding === 'UTF-8') {
10250 2
                return \mb_strwidth($str);
10251
            }
10252
10253
            return \mb_strwidth($str, $encoding);
10254
        }
10255
10256
        //
10257
        // fallback via vanilla php
10258
        //
10259
10260
        if ($encoding !== 'UTF-8') {
10261
            $str = self::encode('UTF-8', $str, false, $encoding);
10262
        }
10263
10264
        $wide = 0;
10265
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10266
10267
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10268
    }
10269
10270
    /**
10271
     * Get part of a string.
10272
     *
10273
     * @see http://php.net/manual/en/function.mb-substr.php
10274
     *
10275
     * @param string $str        <p>The string being checked.</p>
10276
     * @param int    $offset     <p>The first position used in str.</p>
10277
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10278
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10279
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10280
     *
10281
     * @return false|string
10282
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10283
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10284
     *                      characters long, <b>FALSE</b> will be returned.
10285
     */
10286 172
    public static function substr(
10287
        string $str,
10288
        int $offset = 0,
10289
        int $length = null,
10290
        string $encoding = 'UTF-8',
10291
        bool $clean_utf8 = false
10292
    ) {
10293
        // empty string
10294 172
        if ($str === '' || $length === 0) {
10295 8
            return '';
10296
        }
10297
10298 168
        if ($clean_utf8 === true) {
10299
            // iconv and mbstring are not tolerant to invalid encoding
10300
            // further, their behaviour is inconsistent with that of PHP's substr
10301 2
            $str = self::clean($str);
10302
        }
10303
10304
        // whole string
10305 168
        if (!$offset && $length === null) {
10306 7
            return $str;
10307
        }
10308
10309 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10310 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10311
        }
10312
10313
        //
10314
        // fallback via mbstring
10315
        //
10316
10317 163
        if (self::$SUPPORT['mbstring'] === true) {
10318 161
            if ($encoding === 'UTF-8') {
10319 161
                if ($length === null) {
10320 64
                    return \mb_substr($str, $offset);
10321
                }
10322
10323 102
                return \mb_substr($str, $offset, $length);
10324
            }
10325
10326
            return self::substr($str, $offset, $length, $encoding);
10327
        }
10328
10329
        //
10330
        // fallback for binary || ascii only
10331
        //
10332
10333
        if (
10334 4
            $encoding === 'CP850'
10335
            ||
10336 4
            $encoding === 'ASCII'
10337
        ) {
10338
            if ($length === null) {
10339
                return \substr($str, $offset);
10340
            }
10341
10342
            return \substr($str, $offset, $length);
10343
        }
10344
10345
        // otherwise we need the string-length
10346 4
        $str_length = 0;
10347 4
        if ($offset || $length === null) {
10348 4
            $str_length = self::strlen($str, $encoding);
10349
        }
10350
10351
        // e.g.: invalid chars + mbstring not installed
10352 4
        if ($str_length === false) {
10353
            return false;
10354
        }
10355
10356
        // empty string
10357 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10358
            return '';
10359
        }
10360
10361
        // impossible
10362 4
        if ($offset && $offset > $str_length) {
10363
            return '';
10364
        }
10365
10366 4
        if ($length === null) {
10367 4
            $length = (int) $str_length;
10368
        } else {
10369 2
            $length = (int) $length;
10370
        }
10371
10372
        if (
10373 4
            $encoding !== 'UTF-8'
10374
            &&
10375 4
            self::$SUPPORT['mbstring'] === false
10376
        ) {
10377 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10378
        }
10379
10380
        //
10381
        // fallback via intl
10382
        //
10383
10384
        if (
10385 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10386
            &&
10387 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10388
            &&
10389 4
            self::$SUPPORT['intl'] === true
10390
        ) {
10391
            $return_tmp = \grapheme_substr($str, $offset, $length);
10392
            if ($return_tmp !== false) {
10393
                return $return_tmp;
10394
            }
10395
        }
10396
10397
        //
10398
        // fallback via iconv
10399
        //
10400
10401
        if (
10402 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10403
            &&
10404 4
            self::$SUPPORT['iconv'] === true
10405
        ) {
10406
            $return_tmp = \iconv_substr($str, $offset, $length);
10407
            if ($return_tmp !== false) {
10408
                return $return_tmp;
10409
            }
10410
        }
10411
10412
        //
10413
        // fallback for ascii only
10414
        //
10415
10416 4
        if (ASCII::is_ascii($str)) {
10417
            return \substr($str, $offset, $length);
10418
        }
10419
10420
        //
10421
        // fallback via vanilla php
10422
        //
10423
10424
        // split to array, and remove invalid characters
10425 4
        $array = self::str_split($str);
10426
10427
        // extract relevant part, and join to make sting again
10428 4
        return \implode('', \array_slice($array, $offset, $length));
10429
    }
10430
10431
    /**
10432
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
10433
     *
10434
     * @param string   $str1               <p>The main string being compared.</p>
10435
     * @param string   $str2               <p>The secondary string being compared.</p>
10436
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10437
     *                                     counting from the end of the string.</p>
10438
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10439
     *                                     of the length of the str compared to the length of main_str less the
10440
     *                                     offset.</p>
10441
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10442
     *                                     insensitive.</p>
10443
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10444
     *
10445
     * @return int
10446
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10447
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10448
     *             <strong>0</strong> if they are equal
10449
     */
10450 2
    public static function substr_compare(
10451
        string $str1,
10452
        string $str2,
10453
        int $offset = 0,
10454
        int $length = null,
10455
        bool $case_insensitivity = false,
10456
        string $encoding = 'UTF-8'
10457
    ): int {
10458
        if (
10459 2
            $offset !== 0
10460
            ||
10461 2
            $length !== null
10462
        ) {
10463 2
            if ($encoding === 'UTF-8') {
10464 2
                if ($length === null) {
10465 2
                    $str1 = (string) \mb_substr($str1, $offset);
10466
                } else {
10467 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10468
                }
10469 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10470
            } else {
10471
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10472
10473
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10474
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10475
            }
10476
        }
10477
10478 2
        if ($case_insensitivity === true) {
10479 2
            return self::strcasecmp($str1, $str2, $encoding);
10480
        }
10481
10482 2
        return self::strcmp($str1, $str2);
10483
    }
10484
10485
    /**
10486
     * Count the number of substring occurrences.
10487
     *
10488
     * @see http://php.net/manual/en/function.substr-count.php
10489
     *
10490
     * @param string $haystack   <p>The string to search in.</p>
10491
     * @param string $needle     <p>The substring to search for.</p>
10492
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10493
     * @param int    $length     [optional] <p>
10494
     *                           The maximum length after the specified offset to search for the
10495
     *                           substring. It outputs a warning if the offset plus the length is
10496
     *                           greater than the haystack length.
10497
     *                           </p>
10498
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10499
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10500
     *
10501
     * @return false|int this functions returns an integer or false if there isn't a string
10502
     */
10503 5
    public static function substr_count(
10504
        string $haystack,
10505
        string $needle,
10506
        int $offset = 0,
10507
        int $length = null,
10508
        string $encoding = 'UTF-8',
10509
        bool $clean_utf8 = false
10510
    ) {
10511 5
        if ($haystack === '' || $needle === '') {
10512 2
            return false;
10513
        }
10514
10515 5
        if ($length === 0) {
10516 2
            return 0;
10517
        }
10518
10519 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10520 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10521
        }
10522
10523 5
        if ($clean_utf8 === true) {
10524
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10525
            // if invalid characters are found in $haystack before $needle
10526
            $needle = self::clean($needle);
10527
            $haystack = self::clean($haystack);
10528
        }
10529
10530 5
        if ($offset || $length > 0) {
10531 2
            if ($length === null) {
10532 2
                $length_tmp = self::strlen($haystack, $encoding);
10533 2
                if ($length_tmp === false) {
10534
                    return false;
10535
                }
10536 2
                $length = (int) $length_tmp;
10537
            }
10538
10539 2
            if ($encoding === 'UTF-8') {
10540 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10541
            } else {
10542 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10543
            }
10544
        }
10545
10546
        if (
10547 5
            $encoding !== 'UTF-8'
10548
            &&
10549 5
            self::$SUPPORT['mbstring'] === false
10550
        ) {
10551
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10552
        }
10553
10554 5
        if (self::$SUPPORT['mbstring'] === true) {
10555 5
            if ($encoding === 'UTF-8') {
10556 5
                return \mb_substr_count($haystack, $needle);
10557
            }
10558
10559 2
            return \mb_substr_count($haystack, $needle, $encoding);
10560
        }
10561
10562
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10563
10564
        return \count($matches);
10565
    }
10566
10567
    /**
10568
     * Count the number of substring occurrences.
10569
     *
10570
     * @param string $haystack <p>
10571
     *                         The string being checked.
10572
     *                         </p>
10573
     * @param string $needle   <p>
10574
     *                         The string being found.
10575
     *                         </p>
10576
     * @param int    $offset   [optional] <p>
10577
     *                         The offset where to start counting
10578
     *                         </p>
10579
     * @param int    $length   [optional] <p>
10580
     *                         The maximum length after the specified offset to search for the
10581
     *                         substring. It outputs a warning if the offset plus the length is
10582
     *                         greater than the haystack length.
10583
     *                         </p>
10584
     *
10585
     * @return false|int the number of times the
10586
     *                   needle substring occurs in the
10587
     *                   haystack string
10588
     */
10589
    public static function substr_count_in_byte(
10590
        string $haystack,
10591
        string $needle,
10592
        int $offset = 0,
10593
        int $length = null
10594
    ) {
10595
        if ($haystack === '' || $needle === '') {
10596
            return 0;
10597
        }
10598
10599
        if (
10600
            ($offset || $length !== null)
10601
            &&
10602
            self::$SUPPORT['mbstring_func_overload'] === true
10603
        ) {
10604
            if ($length === null) {
10605
                $length_tmp = self::strlen($haystack);
10606
                if ($length_tmp === false) {
10607
                    return false;
10608
                }
10609
                $length = (int) $length_tmp;
10610
            }
10611
10612
            if (
10613
                (
10614
                    $length !== 0
10615
                    &&
10616
                    $offset !== 0
10617
                )
10618
                &&
10619
                ($length + $offset) <= 0
10620
                &&
10621
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10622
            ) {
10623
                return false;
10624
            }
10625
10626
            /** @var string|false $haystack_tmp - needed for PhpStan (stubs error) */
10627
            $haystack_tmp = \substr($haystack, $offset, $length);
10628
            if ($haystack_tmp === false) {
10629
                $haystack_tmp = '';
10630
            }
10631
            $haystack = (string) $haystack_tmp;
10632
        }
10633
10634
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10635
            // "mb_" is available if overload is used, so use it ...
10636
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10637
        }
10638
10639
        if ($length === null) {
10640
            return \substr_count($haystack, $needle, $offset);
10641
        }
10642
10643
        return \substr_count($haystack, $needle, $offset, $length);
10644
    }
10645
10646
    /**
10647
     * Returns the number of occurrences of $substring in the given string.
10648
     * By default, the comparison is case-sensitive, but can be made insensitive
10649
     * by setting $case_sensitive to false.
10650
     *
10651
     * @param string $str            <p>The input string.</p>
10652
     * @param string $substring      <p>The substring to search for.</p>
10653
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10654
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10655
     *
10656
     * @return int
10657
     */
10658 15
    public static function substr_count_simple(
10659
        string $str,
10660
        string $substring,
10661
        bool $case_sensitive = true,
10662
        string $encoding = 'UTF-8'
10663
    ): int {
10664 15
        if ($str === '' || $substring === '') {
10665 2
            return 0;
10666
        }
10667
10668 13
        if ($encoding === 'UTF-8') {
10669 7
            if ($case_sensitive) {
10670
                return (int) \mb_substr_count($str, $substring);
10671
            }
10672
10673 7
            return (int) \mb_substr_count(
10674 7
                \mb_strtoupper($str),
10675 7
                \mb_strtoupper($substring)
10676
            );
10677
        }
10678
10679 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10680
10681 6
        if ($case_sensitive) {
10682 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10683
        }
10684
10685 3
        return (int) \mb_substr_count(
10686 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10687 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10688 3
            $encoding
10689
        );
10690
    }
10691
10692
    /**
10693
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
10694
     *
10695
     * @param string $haystack <p>The string to search in.</p>
10696
     * @param string $needle   <p>The substring to search for.</p>
10697
     *
10698
     * @return string return the sub-string
10699
     */
10700 2
    public static function substr_ileft(string $haystack, string $needle): string
10701
    {
10702 2
        if ($haystack === '') {
10703 2
            return '';
10704
        }
10705
10706 2
        if ($needle === '') {
10707 2
            return $haystack;
10708
        }
10709
10710 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10711 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10712
        }
10713
10714 2
        return $haystack;
10715
    }
10716
10717
    /**
10718
     * Get part of a string process in bytes.
10719
     *
10720
     * @param string $str    <p>The string being checked.</p>
10721
     * @param int    $offset <p>The first position used in str.</p>
10722
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10723
     *
10724
     * @return false|string
10725
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10726
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10727
     *                      characters long, <b>FALSE</b> will be returned.
10728
     */
10729
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10730
    {
10731
        // empty string
10732
        if ($str === '' || $length === 0) {
10733
            return '';
10734
        }
10735
10736
        // whole string
10737
        if (!$offset && $length === null) {
10738
            return $str;
10739
        }
10740
10741
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10742
            // "mb_" is available if overload is used, so use it ...
10743
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10744
        }
10745
10746
        return \substr($str, $offset, $length ?? 2147483647);
10747
    }
10748
10749
    /**
10750
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
10751
     *
10752
     * @param string $haystack <p>The string to search in.</p>
10753
     * @param string $needle   <p>The substring to search for.</p>
10754
     *
10755
     * @return string return the sub-string
10756
     */
10757 2
    public static function substr_iright(string $haystack, string $needle): string
10758
    {
10759 2
        if ($haystack === '') {
10760 2
            return '';
10761
        }
10762
10763 2
        if ($needle === '') {
10764 2
            return $haystack;
10765
        }
10766
10767 2
        if (self::str_iends_with($haystack, $needle) === true) {
10768 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10769
        }
10770
10771 2
        return $haystack;
10772
    }
10773
10774
    /**
10775
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
10776
     *
10777
     * @param string $haystack <p>The string to search in.</p>
10778
     * @param string $needle   <p>The substring to search for.</p>
10779
     *
10780
     * @return string return the sub-string
10781
     */
10782 2
    public static function substr_left(string $haystack, string $needle): string
10783
    {
10784 2
        if ($haystack === '') {
10785 2
            return '';
10786
        }
10787
10788 2
        if ($needle === '') {
10789 2
            return $haystack;
10790
        }
10791
10792 2
        if (self::str_starts_with($haystack, $needle) === true) {
10793 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10794
        }
10795
10796 2
        return $haystack;
10797
    }
10798
10799
    /**
10800
     * Replace text within a portion of a string.
10801
     *
10802
     * source: https://gist.github.com/stemar/8287074
10803
     *
10804
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10805
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10806
     * @param int|int[]       $offset      <p>
10807
     *                                     If start is positive, the replacing will begin at the start'th offset
10808
     *                                     into string.
10809
     *                                     <br><br>
10810
     *                                     If start is negative, the replacing will begin at the start'th character
10811
     *                                     from the end of string.
10812
     *                                     </p>
10813
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10814
     *                                     portion of string which is to be replaced. If it is negative, it
10815
     *                                     represents the number of characters from the end of string at which to
10816
     *                                     stop replacing. If it is not given, then it will default to strlen(
10817
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10818
     *                                     length is zero then this function will have the effect of inserting
10819
     *                                     replacement into string at the given start offset.</p>
10820
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10821
     *
10822
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10823
     */
10824 10
    public static function substr_replace(
10825
        $str,
10826
        $replacement,
10827
        $offset,
10828
        $length = null,
10829
        string $encoding = 'UTF-8'
10830
    ) {
10831 10
        if (\is_array($str) === true) {
10832 1
            $num = \count($str);
10833
10834
            // the replacement
10835 1
            if (\is_array($replacement) === true) {
10836 1
                $replacement = \array_slice($replacement, 0, $num);
10837
            } else {
10838 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10839
            }
10840
10841
            // the offset
10842 1
            if (\is_array($offset) === true) {
10843 1
                $offset = \array_slice($offset, 0, $num);
10844 1
                foreach ($offset as &$value_tmp) {
10845 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10846
                }
10847 1
                unset($value_tmp);
10848
            } else {
10849 1
                $offset = \array_pad([$offset], $num, $offset);
10850
            }
10851
10852
            // the length
10853 1
            if ($length === null) {
10854 1
                $length = \array_fill(0, $num, 0);
10855 1
            } elseif (\is_array($length) === true) {
10856 1
                $length = \array_slice($length, 0, $num);
10857 1
                foreach ($length as &$value_tmp_V2) {
10858 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
10859
                }
10860 1
                unset($value_tmp_V2);
10861
            } else {
10862 1
                $length = \array_pad([$length], $num, $length);
10863
            }
10864
10865
            // recursive call
10866 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10867
        }
10868
10869 10
        if (\is_array($replacement) === true) {
10870 1
            if (\count($replacement) > 0) {
10871 1
                $replacement = $replacement[0];
10872
            } else {
10873 1
                $replacement = '';
10874
            }
10875
        }
10876
10877
        // init
10878 10
        $str = (string) $str;
10879 10
        $replacement = (string) $replacement;
10880
10881 10
        if (\is_array($length) === true) {
10882
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10883
        }
10884
10885 10
        if (\is_array($offset) === true) {
10886
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10887
        }
10888
10889 10
        if ($str === '') {
10890 1
            return $replacement;
10891
        }
10892
10893 9
        if (self::$SUPPORT['mbstring'] === true) {
10894 9
            $string_length = (int) self::strlen($str, $encoding);
10895
10896 9
            if ($offset < 0) {
10897 1
                $offset = (int) \max(0, $string_length + $offset);
10898 9
            } elseif ($offset > $string_length) {
10899 1
                $offset = $string_length;
10900
            }
10901
10902 9
            if ($length !== null && $length < 0) {
10903 1
                $length = (int) \max(0, $string_length - $offset + $length);
10904 9
            } elseif ($length === null || $length > $string_length) {
10905 4
                $length = $string_length;
10906
            }
10907
10908
            /** @noinspection AdditionOperationOnArraysInspection */
10909 9
            if (($offset + $length) > $string_length) {
10910 4
                $length = $string_length - $offset;
10911
            }
10912
10913
            /** @noinspection AdditionOperationOnArraysInspection */
10914 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10915 9
                   $replacement .
10916 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10917
        }
10918
10919
        //
10920
        // fallback for ascii only
10921
        //
10922
10923
        if (ASCII::is_ascii($str)) {
10924
            return ($length === null) ?
10925
                \substr_replace($str, $replacement, $offset) :
10926
                \substr_replace($str, $replacement, $offset, $length);
10927
        }
10928
10929
        //
10930
        // fallback via vanilla php
10931
        //
10932
10933
        \preg_match_all('/./us', $str, $str_matches);
10934
        \preg_match_all('/./us', $replacement, $replacement_matches);
10935
10936
        if ($length === null) {
10937
            $length_tmp = self::strlen($str, $encoding);
10938
            if ($length_tmp === false) {
10939
                // e.g.: non mbstring support + invalid chars
10940
                return '';
10941
            }
10942
            $length = (int) $length_tmp;
10943
        }
10944
10945
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
10946
10947
        return \implode('', $str_matches[0]);
10948
    }
10949
10950
    /**
10951
     * Removes a suffix ($needle) from the end of the string ($haystack).
10952
     *
10953
     * @param string $haystack <p>The string to search in.</p>
10954
     * @param string $needle   <p>The substring to search for.</p>
10955
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10956
     *
10957
     * @return string return the sub-string
10958
     */
10959 2
    public static function substr_right(
10960
        string $haystack,
10961
        string $needle,
10962
        string $encoding = 'UTF-8'
10963
    ): string {
10964 2
        if ($haystack === '') {
10965 2
            return '';
10966
        }
10967
10968 2
        if ($needle === '') {
10969 2
            return $haystack;
10970
        }
10971
10972
        if (
10973 2
            $encoding === 'UTF-8'
10974
            &&
10975 2
            \substr($haystack, -\strlen($needle)) === $needle
10976
        ) {
10977 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10978
        }
10979
10980 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10981
            return (string) self::substr(
10982
                $haystack,
10983
                0,
10984
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10985
                $encoding
10986
            );
10987
        }
10988
10989 2
        return $haystack;
10990
    }
10991
10992
    /**
10993
     * Returns a case swapped version of the string.
10994
     *
10995
     * @param string $str        <p>The input string.</p>
10996
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10997
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10998
     *
10999
     * @return string each character's case swapped
11000
     */
11001 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
11002
    {
11003 6
        if ($str === '') {
11004 1
            return '';
11005
        }
11006
11007 6
        if ($clean_utf8 === true) {
11008
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11009
            // if invalid characters are found in $haystack before $needle
11010 2
            $str = self::clean($str);
11011
        }
11012
11013 6
        if ($encoding === 'UTF-8') {
11014 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11015
        }
11016
11017 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11018
    }
11019
11020
    /**
11021
     * Checks whether symfony-polyfills are used.
11022
     *
11023
     * @return bool
11024
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11025
     */
11026
    public static function symfony_polyfill_used(): bool
11027
    {
11028
        // init
11029
        $return = false;
11030
11031
        $return_tmp = \extension_loaded('mbstring');
11032
        if ($return_tmp === false && \function_exists('mb_strlen')) {
11033
            $return = true;
11034
        }
11035
11036
        $return_tmp = \extension_loaded('iconv');
11037
        if ($return_tmp === false && \function_exists('iconv')) {
11038
            $return = true;
11039
        }
11040
11041
        return $return;
11042
    }
11043
11044
    /**
11045
     * @param string $str
11046
     * @param int    $tab_length
11047
     *
11048
     * @return string
11049
     */
11050 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11051
    {
11052 6
        if ($tab_length === 4) {
11053 3
            $spaces = '    ';
11054 3
        } elseif ($tab_length === 2) {
11055 1
            $spaces = '  ';
11056
        } else {
11057 2
            $spaces = \str_repeat(' ', $tab_length);
11058
        }
11059
11060 6
        return \str_replace("\t", $spaces, $str);
11061
    }
11062
11063
    /**
11064
     * Converts the first character of each word in the string to uppercase
11065
     * and all other chars to lowercase.
11066
     *
11067
     * @param string      $str                           <p>The input string.</p>
11068
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11069
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11070
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11071
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11072
     *
11073
     * @return string
11074
     *                <p>A string with all characters of $str being title-cased.</p>
11075
     */
11076 5
    public static function titlecase(
11077
        string $str,
11078
        string $encoding = 'UTF-8',
11079
        bool $clean_utf8 = false,
11080
        string $lang = null,
11081
        bool $try_to_keep_the_string_length = false
11082
    ): string {
11083 5
        if ($clean_utf8 === true) {
11084
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11085
            // if invalid characters are found in $haystack before $needle
11086
            $str = self::clean($str);
11087
        }
11088
11089 5
        if ($lang === null && $try_to_keep_the_string_length === false) {
11090 5
            if ($encoding === 'UTF-8') {
11091 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11092
            }
11093
11094 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11095
11096 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11097
        }
11098
11099
        return self::str_titleize(
11100
            $str,
11101
            null,
11102
            $encoding,
11103
            false,
11104
            $lang,
11105
            $try_to_keep_the_string_length,
11106
            false
11107
        );
11108
    }
11109
11110
    /**
11111
     * alias for "UTF8::to_ascii()"
11112
     *
11113
     * @param string $str
11114
     * @param string $subst_chr
11115
     * @param bool   $strict
11116
     *
11117
     * @return string
11118
     *
11119
     * @see UTF8::to_ascii()
11120
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11121
     */
11122 7
    public static function toAscii(
11123
        string $str,
11124
        string $subst_chr = '?',
11125
        bool $strict = false
11126
    ): string {
11127 7
        return self::to_ascii($str, $subst_chr, $strict);
11128
    }
11129
11130
    /**
11131
     * alias for "UTF8::to_iso8859()"
11132
     *
11133
     * @param string|string[] $str
11134
     *
11135
     * @return string|string[]
11136
     *
11137
     * @see UTF8::to_iso8859()
11138
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11139
     */
11140 2
    public static function toIso8859($str)
11141
    {
11142 2
        return self::to_iso8859($str);
11143
    }
11144
11145
    /**
11146
     * alias for "UTF8::to_latin1()"
11147
     *
11148
     * @param string|string[] $str
11149
     *
11150
     * @return string|string[]
11151
     *
11152
     * @see UTF8::to_iso8859()
11153
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11154
     */
11155 2
    public static function toLatin1($str)
11156
    {
11157 2
        return self::to_iso8859($str);
11158
    }
11159
11160
    /**
11161
     * alias for "UTF8::to_utf8()"
11162
     *
11163
     * @param string|string[] $str
11164
     *
11165
     * @return string|string[]
11166
     *
11167
     * @see UTF8::to_utf8()
11168
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11169
     */
11170 2
    public static function toUTF8($str)
11171
    {
11172 2
        return self::to_utf8($str);
11173
    }
11174
11175
    /**
11176
     * Convert a string into ASCII.
11177
     *
11178
     * @param string $str     <p>The input string.</p>
11179
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11180
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11181
     *                        performance</p>
11182
     *
11183
     * @return string
11184
     */
11185 37
    public static function to_ascii(
11186
        string $str,
11187
        string $unknown = '?',
11188
        bool $strict = false
11189
    ): string {
11190 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11191
    }
11192
11193
    /**
11194
     * @param mixed $str
11195
     *
11196
     * @return bool
11197
     */
11198 19
    public static function to_boolean($str): bool
11199
    {
11200
        // init
11201 19
        $str = (string) $str;
11202
11203 19
        if ($str === '') {
11204 2
            return false;
11205
        }
11206
11207
        // Info: http://php.net/manual/en/filter.filters.validate.php
11208
        $map = [
11209 17
            'true'  => true,
11210
            '1'     => true,
11211
            'on'    => true,
11212
            'yes'   => true,
11213
            'false' => false,
11214
            '0'     => false,
11215
            'off'   => false,
11216
            'no'    => false,
11217
        ];
11218
11219 17
        if (isset($map[$str])) {
11220 11
            return $map[$str];
11221
        }
11222
11223 6
        $key = \strtolower($str);
11224 6
        if (isset($map[$key])) {
11225 2
            return $map[$key];
11226
        }
11227
11228 4
        if (\is_numeric($str)) {
11229 2
            return ((float) $str + 0) > 0;
11230
        }
11231
11232 2
        return (bool) \trim($str);
11233
    }
11234
11235
    /**
11236
     * Convert given string to safe filename (and keep string case).
11237
     *
11238
     * @param string $str
11239
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11240
     *                                  simply replaced with hyphen.
11241
     * @param string $fallback_char
11242
     *
11243
     * @return string
11244
     */
11245 1
    public static function to_filename(
11246
        string $str,
11247
        bool $use_transliterate = false,
11248
        string $fallback_char = '-'
11249
    ): string {
11250 1
        return ASCII::to_filename(
11251 1
            $str,
11252 1
            $use_transliterate,
11253 1
            $fallback_char
11254
        );
11255
    }
11256
11257
    /**
11258
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11259
     *
11260
     * @param string|string[] $str
11261
     *
11262
     * @return string|string[]
11263
     */
11264 8
    public static function to_iso8859($str)
11265
    {
11266 8
        if (\is_array($str) === true) {
11267 2
            foreach ($str as $k => &$v) {
11268 2
                $v = self::to_iso8859($v);
11269
            }
11270
11271 2
            return $str;
11272
        }
11273
11274 8
        $str = (string) $str;
11275 8
        if ($str === '') {
11276 2
            return '';
11277
        }
11278
11279 8
        return self::utf8_decode($str);
11280
    }
11281
11282
    /**
11283
     * alias for "UTF8::to_iso8859()"
11284
     *
11285
     * @param string|string[] $str
11286
     *
11287
     * @return string|string[]
11288
     *
11289
     * @see UTF8::to_iso8859()
11290
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11291
     */
11292 2
    public static function to_latin1($str)
11293
    {
11294 2
        return self::to_iso8859($str);
11295
    }
11296
11297
    /**
11298
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11299
     *
11300
     * <ul>
11301
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
11302
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11303
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11304
     * case.</li>
11305
     * </ul>
11306
     *
11307
     * @param string|string[] $str                        <p>Any string or array.</p>
11308
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11309
     *
11310
     * @return string|string[] the UTF-8 encoded string
11311
     */
11312 41
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11313
    {
11314 41
        if (\is_array($str) === true) {
11315 4
            foreach ($str as $k => &$v) {
11316 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11317
            }
11318
11319 4
            return $str;
11320
        }
11321
11322 41
        $str = (string) $str;
11323 41
        if ($str === '') {
11324 6
            return $str;
11325
        }
11326
11327 41
        $max = \strlen($str);
11328 41
        $buf = '';
11329
11330 41
        for ($i = 0; $i < $max; ++$i) {
11331 41
            $c1 = $str[$i];
11332
11333 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11334
11335 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11336
11337 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11338
11339 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11340 20
                        $buf .= $c1 . $c2;
11341 20
                        ++$i;
11342
                    } else { // not valid UTF8 - convert it
11343 34
                        $buf .= self::to_utf8_convert_helper($c1);
11344
                    }
11345 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11346
11347 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11348 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11349
11350 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11351 15
                        $buf .= $c1 . $c2 . $c3;
11352 15
                        $i += 2;
11353
                    } else { // not valid UTF8 - convert it
11354 33
                        $buf .= self::to_utf8_convert_helper($c1);
11355
                    }
11356 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11357
11358 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11359 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11360 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11361
11362 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11363 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11364 8
                        $i += 3;
11365
                    } else { // not valid UTF8 - convert it
11366 26
                        $buf .= self::to_utf8_convert_helper($c1);
11367
                    }
11368
                } else { // doesn't look like UTF8, but should be converted
11369
11370 37
                    $buf .= self::to_utf8_convert_helper($c1);
11371
                }
11372 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11373
11374 4
                $buf .= self::to_utf8_convert_helper($c1);
11375
            } else { // it doesn't need conversion
11376
11377 38
                $buf .= $c1;
11378
            }
11379
        }
11380
11381
        // decode unicode escape sequences + unicode surrogate pairs
11382 41
        $buf = \preg_replace_callback(
11383 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11384
            /**
11385
             * @param array $matches
11386
             *
11387
             * @return string
11388
             */
11389
            static function (array $matches): string {
11390 12
                if (isset($matches[3])) {
11391 12
                    $cp = (int) \hexdec($matches[3]);
11392
                } else {
11393
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11394
                    $cp = ((int) \hexdec($matches[1]) << 10)
11395
                          + (int) \hexdec($matches[2])
11396
                          + 0x10000
11397
                          - (0xD800 << 10)
11398
                          - 0xDC00;
11399
                }
11400
11401
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11402
                //
11403
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11404
11405 12
                if ($cp < 0x80) {
11406 8
                    return (string) self::chr($cp);
11407
                }
11408
11409 9
                if ($cp < 0xA0) {
11410
                    /** @noinspection UnnecessaryCastingInspection */
11411
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11412
                }
11413
11414 9
                return self::decimal_to_chr($cp);
11415 41
            },
11416 41
            $buf
11417
        );
11418
11419 41
        if ($buf === null) {
11420
            return '';
11421
        }
11422
11423
        // decode UTF-8 codepoints
11424 41
        if ($decode_html_entity_to_utf8 === true) {
11425 2
            $buf = self::html_entity_decode($buf);
11426
        }
11427
11428 41
        return $buf;
11429
    }
11430
11431
    /**
11432
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
11433
     *
11434
     * INFO: This is slower then "trim()"
11435
     *
11436
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11437
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11438
     *
11439
     * @param string      $str   <p>The string to be trimmed</p>
11440
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11441
     *
11442
     * @return string the trimmed string
11443
     */
11444 55
    public static function trim(string $str = '', string $chars = null): string
11445
    {
11446 55
        if ($str === '') {
11447 9
            return '';
11448
        }
11449
11450 48
        if ($chars) {
11451 27
            $chars = \preg_quote($chars, '/');
11452 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11453
        } else {
11454 21
            $pattern = '^[\\s]+|[\\s]+$';
11455
        }
11456
11457 48
        if (self::$SUPPORT['mbstring'] === true) {
11458
            /** @noinspection PhpComposerExtensionStubsInspection */
11459 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11460
        }
11461
11462 8
        return self::regex_replace($str, $pattern, '', '', '/');
11463
    }
11464
11465
    /**
11466
     * Makes string's first char uppercase.
11467
     *
11468
     * @param string      $str                           <p>The input string.</p>
11469
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11470
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11471
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11472
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11473
     *
11474
     * @return string the resulting string
11475
     */
11476 69
    public static function ucfirst(
11477
        string $str,
11478
        string $encoding = 'UTF-8',
11479
        bool $clean_utf8 = false,
11480
        string $lang = null,
11481
        bool $try_to_keep_the_string_length = false
11482
    ): string {
11483 69
        if ($str === '') {
11484 3
            return '';
11485
        }
11486
11487 68
        if ($clean_utf8 === true) {
11488
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11489
            // if invalid characters are found in $haystack before $needle
11490 1
            $str = self::clean($str);
11491
        }
11492
11493 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11494
11495 68
        if ($encoding === 'UTF-8') {
11496 22
            $str_part_two = (string) \mb_substr($str, 1);
11497
11498 22
            if ($use_mb_functions === true) {
11499 22
                $str_part_one = \mb_strtoupper(
11500 22
                    (string) \mb_substr($str, 0, 1)
11501
                );
11502
            } else {
11503
                $str_part_one = self::strtoupper(
11504
                    (string) \mb_substr($str, 0, 1),
11505
                    $encoding,
11506
                    false,
11507
                    $lang,
11508 22
                    $try_to_keep_the_string_length
11509
                );
11510
            }
11511
        } else {
11512 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11513
11514 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11515
11516 47
            if ($use_mb_functions === true) {
11517 47
                $str_part_one = \mb_strtoupper(
11518 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11519 47
                    $encoding
11520
                );
11521
            } else {
11522
                $str_part_one = self::strtoupper(
11523
                    (string) self::substr($str, 0, 1, $encoding),
11524
                    $encoding,
11525
                    false,
11526
                    $lang,
11527
                    $try_to_keep_the_string_length
11528
                );
11529
            }
11530
        }
11531
11532 68
        return $str_part_one . $str_part_two;
11533
    }
11534
11535
    /**
11536
     * alias for "UTF8::ucfirst()"
11537
     *
11538
     * @param string $str
11539
     * @param string $encoding
11540
     * @param bool   $clean_utf8
11541
     *
11542
     * @return string
11543
     *
11544
     * @see UTF8::ucfirst()
11545
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
11546
     */
11547 1
    public static function ucword(
11548
        string $str,
11549
        string $encoding = 'UTF-8',
11550
        bool $clean_utf8 = false
11551
    ): string {
11552 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11553
    }
11554
11555
    /**
11556
     * Uppercase for all words in the string.
11557
     *
11558
     * @param string   $str        <p>The input string.</p>
11559
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11560
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11561
     *                             word.</p>
11562
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11563
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11564
     *
11565
     * @return string
11566
     */
11567 8
    public static function ucwords(
11568
        string $str,
11569
        array $exceptions = [],
11570
        string $char_list = '',
11571
        string $encoding = 'UTF-8',
11572
        bool $clean_utf8 = false
11573
    ): string {
11574 8
        if (!$str) {
11575 2
            return '';
11576
        }
11577
11578
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11579
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11580
11581 7
        if ($clean_utf8 === true) {
11582
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11583
            // if invalid characters are found in $haystack before $needle
11584 1
            $str = self::clean($str);
11585
        }
11586
11587 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11588
11589
        if (
11590 7
            $use_php_default_functions === true
11591
            &&
11592 7
            ASCII::is_ascii($str) === true
11593
        ) {
11594
            return \ucwords($str);
11595
        }
11596
11597 7
        $words = self::str_to_words($str, $char_list);
11598 7
        $use_exceptions = \count($exceptions) > 0;
11599
11600 7
        foreach ($words as &$word) {
11601 7
            if (!$word) {
11602 7
                continue;
11603
            }
11604
11605
            if (
11606 7
                $use_exceptions === false
11607
                ||
11608 7
                !\in_array($word, $exceptions, true)
11609
            ) {
11610 7
                $word = self::ucfirst($word, $encoding);
11611
            }
11612
        }
11613
11614 7
        return \implode('', $words);
11615
    }
11616
11617
    /**
11618
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
11619
     *
11620
     * e.g:
11621
     * 'test+test'                     => 'test test'
11622
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11623
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11624
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11625
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11626
     * 'Düsseldorf'                   => 'Düsseldorf'
11627
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11628
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11629
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11630
     *
11631
     * @param string $str          <p>The input string.</p>
11632
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11633
     *
11634
     * @return string
11635
     */
11636 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11637
    {
11638 4
        if ($str === '') {
11639 3
            return '';
11640
        }
11641
11642
        if (
11643 4
            \strpos($str, '&') === false
11644
            &&
11645 4
            \strpos($str, '%') === false
11646
            &&
11647 4
            \strpos($str, '+') === false
11648
            &&
11649 4
            \strpos($str, '\u') === false
11650
        ) {
11651 3
            return self::fix_simple_utf8($str);
11652
        }
11653
11654 4
        $str = self::urldecode_unicode_helper($str);
11655
11656
        do {
11657 4
            $str_compare = $str;
11658
11659
            /**
11660
             * @psalm-suppress PossiblyInvalidArgument
11661
             */
11662 4
            $str = self::fix_simple_utf8(
11663 4
                \urldecode(
11664 4
                    self::html_entity_decode(
11665 4
                        self::to_utf8($str),
11666 4
                        \ENT_QUOTES | \ENT_HTML5
11667
                    )
11668
                )
11669
            );
11670 4
        } while ($multi_decode === true && $str_compare !== $str);
11671
11672 4
        return $str;
11673
    }
11674
11675
    /**
11676
     * Return a array with "urlencoded"-win1252 -> UTF-8
11677
     *
11678
     * @return string[]
11679
     *
11680
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11681
     */
11682 2
    public static function urldecode_fix_win1252_chars(): array
11683
    {
11684
        return [
11685 2
            '%20' => ' ',
11686
            '%21' => '!',
11687
            '%22' => '"',
11688
            '%23' => '#',
11689
            '%24' => '$',
11690
            '%25' => '%',
11691
            '%26' => '&',
11692
            '%27' => "'",
11693
            '%28' => '(',
11694
            '%29' => ')',
11695
            '%2A' => '*',
11696
            '%2B' => '+',
11697
            '%2C' => ',',
11698
            '%2D' => '-',
11699
            '%2E' => '.',
11700
            '%2F' => '/',
11701
            '%30' => '0',
11702
            '%31' => '1',
11703
            '%32' => '2',
11704
            '%33' => '3',
11705
            '%34' => '4',
11706
            '%35' => '5',
11707
            '%36' => '6',
11708
            '%37' => '7',
11709
            '%38' => '8',
11710
            '%39' => '9',
11711
            '%3A' => ':',
11712
            '%3B' => ';',
11713
            '%3C' => '<',
11714
            '%3D' => '=',
11715
            '%3E' => '>',
11716
            '%3F' => '?',
11717
            '%40' => '@',
11718
            '%41' => 'A',
11719
            '%42' => 'B',
11720
            '%43' => 'C',
11721
            '%44' => 'D',
11722
            '%45' => 'E',
11723
            '%46' => 'F',
11724
            '%47' => 'G',
11725
            '%48' => 'H',
11726
            '%49' => 'I',
11727
            '%4A' => 'J',
11728
            '%4B' => 'K',
11729
            '%4C' => 'L',
11730
            '%4D' => 'M',
11731
            '%4E' => 'N',
11732
            '%4F' => 'O',
11733
            '%50' => 'P',
11734
            '%51' => 'Q',
11735
            '%52' => 'R',
11736
            '%53' => 'S',
11737
            '%54' => 'T',
11738
            '%55' => 'U',
11739
            '%56' => 'V',
11740
            '%57' => 'W',
11741
            '%58' => 'X',
11742
            '%59' => 'Y',
11743
            '%5A' => 'Z',
11744
            '%5B' => '[',
11745
            '%5C' => '\\',
11746
            '%5D' => ']',
11747
            '%5E' => '^',
11748
            '%5F' => '_',
11749
            '%60' => '`',
11750
            '%61' => 'a',
11751
            '%62' => 'b',
11752
            '%63' => 'c',
11753
            '%64' => 'd',
11754
            '%65' => 'e',
11755
            '%66' => 'f',
11756
            '%67' => 'g',
11757
            '%68' => 'h',
11758
            '%69' => 'i',
11759
            '%6A' => 'j',
11760
            '%6B' => 'k',
11761
            '%6C' => 'l',
11762
            '%6D' => 'm',
11763
            '%6E' => 'n',
11764
            '%6F' => 'o',
11765
            '%70' => 'p',
11766
            '%71' => 'q',
11767
            '%72' => 'r',
11768
            '%73' => 's',
11769
            '%74' => 't',
11770
            '%75' => 'u',
11771
            '%76' => 'v',
11772
            '%77' => 'w',
11773
            '%78' => 'x',
11774
            '%79' => 'y',
11775
            '%7A' => 'z',
11776
            '%7B' => '{',
11777
            '%7C' => '|',
11778
            '%7D' => '}',
11779
            '%7E' => '~',
11780
            '%7F' => '',
11781
            '%80' => '`',
11782
            '%81' => '',
11783
            '%82' => '‚',
11784
            '%83' => 'ƒ',
11785
            '%84' => '„',
11786
            '%85' => '…',
11787
            '%86' => '†',
11788
            '%87' => '‡',
11789
            '%88' => 'ˆ',
11790
            '%89' => '‰',
11791
            '%8A' => 'Š',
11792
            '%8B' => '‹',
11793
            '%8C' => 'Œ',
11794
            '%8D' => '',
11795
            '%8E' => 'Ž',
11796
            '%8F' => '',
11797
            '%90' => '',
11798
            '%91' => '‘',
11799
            '%92' => '’',
11800
            '%93' => '“',
11801
            '%94' => '”',
11802
            '%95' => '•',
11803
            '%96' => '–',
11804
            '%97' => '—',
11805
            '%98' => '˜',
11806
            '%99' => '™',
11807
            '%9A' => 'š',
11808
            '%9B' => '›',
11809
            '%9C' => 'œ',
11810
            '%9D' => '',
11811
            '%9E' => 'ž',
11812
            '%9F' => 'Ÿ',
11813
            '%A0' => '',
11814
            '%A1' => '¡',
11815
            '%A2' => '¢',
11816
            '%A3' => '£',
11817
            '%A4' => '¤',
11818
            '%A5' => '¥',
11819
            '%A6' => '¦',
11820
            '%A7' => '§',
11821
            '%A8' => '¨',
11822
            '%A9' => '©',
11823
            '%AA' => 'ª',
11824
            '%AB' => '«',
11825
            '%AC' => '¬',
11826
            '%AD' => '',
11827
            '%AE' => '®',
11828
            '%AF' => '¯',
11829
            '%B0' => '°',
11830
            '%B1' => '±',
11831
            '%B2' => '²',
11832
            '%B3' => '³',
11833
            '%B4' => '´',
11834
            '%B5' => 'µ',
11835
            '%B6' => '¶',
11836
            '%B7' => '·',
11837
            '%B8' => '¸',
11838
            '%B9' => '¹',
11839
            '%BA' => 'º',
11840
            '%BB' => '»',
11841
            '%BC' => '¼',
11842
            '%BD' => '½',
11843
            '%BE' => '¾',
11844
            '%BF' => '¿',
11845
            '%C0' => 'À',
11846
            '%C1' => 'Á',
11847
            '%C2' => 'Â',
11848
            '%C3' => 'Ã',
11849
            '%C4' => 'Ä',
11850
            '%C5' => 'Å',
11851
            '%C6' => 'Æ',
11852
            '%C7' => 'Ç',
11853
            '%C8' => 'È',
11854
            '%C9' => 'É',
11855
            '%CA' => 'Ê',
11856
            '%CB' => 'Ë',
11857
            '%CC' => 'Ì',
11858
            '%CD' => 'Í',
11859
            '%CE' => 'Î',
11860
            '%CF' => 'Ï',
11861
            '%D0' => 'Ð',
11862
            '%D1' => 'Ñ',
11863
            '%D2' => 'Ò',
11864
            '%D3' => 'Ó',
11865
            '%D4' => 'Ô',
11866
            '%D5' => 'Õ',
11867
            '%D6' => 'Ö',
11868
            '%D7' => '×',
11869
            '%D8' => 'Ø',
11870
            '%D9' => 'Ù',
11871
            '%DA' => 'Ú',
11872
            '%DB' => 'Û',
11873
            '%DC' => 'Ü',
11874
            '%DD' => 'Ý',
11875
            '%DE' => 'Þ',
11876
            '%DF' => 'ß',
11877
            '%E0' => 'à',
11878
            '%E1' => 'á',
11879
            '%E2' => 'â',
11880
            '%E3' => 'ã',
11881
            '%E4' => 'ä',
11882
            '%E5' => 'å',
11883
            '%E6' => 'æ',
11884
            '%E7' => 'ç',
11885
            '%E8' => 'è',
11886
            '%E9' => 'é',
11887
            '%EA' => 'ê',
11888
            '%EB' => 'ë',
11889
            '%EC' => 'ì',
11890
            '%ED' => 'í',
11891
            '%EE' => 'î',
11892
            '%EF' => 'ï',
11893
            '%F0' => 'ð',
11894
            '%F1' => 'ñ',
11895
            '%F2' => 'ò',
11896
            '%F3' => 'ó',
11897
            '%F4' => 'ô',
11898
            '%F5' => 'õ',
11899
            '%F6' => 'ö',
11900
            '%F7' => '÷',
11901
            '%F8' => 'ø',
11902
            '%F9' => 'ù',
11903
            '%FA' => 'ú',
11904
            '%FB' => 'û',
11905
            '%FC' => 'ü',
11906
            '%FD' => 'ý',
11907
            '%FE' => 'þ',
11908
            '%FF' => 'ÿ',
11909
        ];
11910
    }
11911
11912
    /**
11913
     * Decodes a UTF-8 string to ISO-8859-1.
11914
     *
11915
     * @param string $str             <p>The input string.</p>
11916
     * @param bool   $keep_utf8_chars
11917
     *
11918
     * @return string
11919
     */
11920 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
11921
    {
11922 14
        if ($str === '') {
11923 6
            return '';
11924
        }
11925
11926
        // save for later comparision
11927 14
        $str_backup = $str;
11928 14
        $len = \strlen($str);
11929
11930 14
        if (self::$ORD === null) {
11931
            self::$ORD = self::getData('ord');
11932
        }
11933
11934 14
        if (self::$CHR === null) {
11935
            self::$CHR = self::getData('chr');
11936
        }
11937
11938 14
        $no_char_found = '?';
11939
        /** @noinspection ForeachInvariantsInspection */
11940 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11941 14
            switch ($str[$i] & "\xF0") {
11942 14
                case "\xC0":
11943 13
                case "\xD0":
11944 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11945 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
11946
11947 13
                    break;
11948
11949
                /** @noinspection PhpMissingBreakStatementInspection */
11950 13
                case "\xF0":
11951
                    ++$i;
11952
11953
                // no break
11954
11955 13
                case "\xE0":
11956 11
                    $str[$j] = $no_char_found;
11957 11
                    $i += 2;
11958
11959 11
                    break;
11960
11961
                default:
11962 12
                    $str[$j] = $str[$i];
11963
            }
11964
        }
11965
11966
        /** @var string|false $return - needed for PhpStan (stubs error) */
11967 14
        $return = \substr($str, 0, $j);
11968 14
        if ($return === false) {
11969
            $return = '';
11970
        }
11971
11972
        if (
11973 14
            $keep_utf8_chars === true
11974
            &&
11975 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
11976
        ) {
11977 2
            return $str_backup;
11978
        }
11979
11980 14
        return $return;
11981
    }
11982
11983
    /**
11984
     * Encodes an ISO-8859-1 string to UTF-8.
11985
     *
11986
     * @param string $str <p>The input string.</p>
11987
     *
11988
     * @return string
11989
     */
11990 14
    public static function utf8_encode(string $str): string
11991
    {
11992 14
        if ($str === '') {
11993 14
            return '';
11994
        }
11995
11996
        /** @var string|false $str - the polyfill maybe return false */
11997 14
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11997
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
11998
11999
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12000
        /** @psalm-suppress TypeDoesNotContainType */
12001 14
        if ($str === false) {
12002
            return '';
12003
        }
12004
12005 14
        return $str;
12006
    }
12007
12008
    /**
12009
     * fix -> utf8-win1252 chars
12010
     *
12011
     * @param string $str <p>The input string.</p>
12012
     *
12013
     * @return string
12014
     *
12015
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
12016
     */
12017 2
    public static function utf8_fix_win1252_chars(string $str): string
12018
    {
12019 2
        return self::fix_simple_utf8($str);
12020
    }
12021
12022
    /**
12023
     * Returns an array with all utf8 whitespace characters.
12024
     *
12025
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12026
     *
12027
     * @return string[]
12028
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12029
     *                  as defined in above URL
12030
     */
12031 2
    public static function whitespace_table(): array
12032
    {
12033 2
        return self::$WHITESPACE_TABLE;
12034
    }
12035
12036
    /**
12037
     * Limit the number of words in a string.
12038
     *
12039
     * @param string $str        <p>The input string.</p>
12040
     * @param int    $limit      <p>The limit of words as integer.</p>
12041
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12042
     *
12043
     * @return string
12044
     */
12045 2
    public static function words_limit(
12046
        string $str,
12047
        int $limit = 100,
12048
        string $str_add_on = '…'
12049
    ): string {
12050 2
        if ($str === '' || $limit < 1) {
12051 2
            return '';
12052
        }
12053
12054 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12055
12056
        if (
12057 2
            !isset($matches[0])
12058
            ||
12059 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12060
        ) {
12061 2
            return $str;
12062
        }
12063
12064 2
        return \rtrim($matches[0]) . $str_add_on;
12065
    }
12066
12067
    /**
12068
     * Wraps a string to a given number of characters
12069
     *
12070
     * @see http://php.net/manual/en/function.wordwrap.php
12071
     *
12072
     * @param string $str   <p>The input string.</p>
12073
     * @param int    $width [optional] <p>The column width.</p>
12074
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12075
     * @param bool   $cut   [optional] <p>
12076
     *                      If the cut is set to true, the string is
12077
     *                      always wrapped at or before the specified width. So if you have
12078
     *                      a word that is larger than the given width, it is broken apart.
12079
     *                      </p>
12080
     *
12081
     * @return string
12082
     *                <p>The given string wrapped at the specified column.</p>
12083
     */
12084 12
    public static function wordwrap(
12085
        string $str,
12086
        int $width = 75,
12087
        string $break = "\n",
12088
        bool $cut = false
12089
    ): string {
12090 12
        if ($str === '' || $break === '') {
12091 4
            return '';
12092
        }
12093
12094 10
        $str_split = \explode($break, $str);
12095 10
        if ($str_split === false) {
12096
            return '';
12097
        }
12098
12099 10
        $chars = [];
12100 10
        $word_split = '';
12101 10
        foreach ($str_split as $i => $i_value) {
12102 10
            if ($i) {
12103 3
                $chars[] = $break;
12104 3
                $word_split .= '#';
12105
            }
12106
12107 10
            foreach (self::str_split($i_value) as $c) {
12108 10
                $chars[] = $c;
12109 10
                if ($c === ' ') {
12110 3
                    $word_split .= ' ';
12111
                } else {
12112 10
                    $word_split .= '?';
12113
                }
12114
            }
12115
        }
12116
12117 10
        $str_return = '';
12118 10
        $j = 0;
12119 10
        $b = -1;
12120 10
        $i = -1;
12121 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12122
12123 10
        $max = \mb_strlen($word_split);
12124 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12125 8
            for (++$i; $i < $b; ++$i) {
12126 8
                $str_return .= $chars[$j];
12127 8
                unset($chars[$j++]);
12128
12129
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12130 8
                if ($i > $max) {
12131
                    break 2;
12132
                }
12133
            }
12134
12135
            if (
12136 8
                $break === $chars[$j]
12137
                ||
12138 8
                $chars[$j] === ' '
12139
            ) {
12140 5
                unset($chars[$j++]);
12141
            }
12142
12143 8
            $str_return .= $break;
12144
12145
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12146 8
            if ($b > $max) {
12147
                break;
12148
            }
12149
        }
12150
12151 10
        return $str_return . \implode('', $chars);
12152
    }
12153
12154
    /**
12155
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12156
     *    ... so that we wrap the per line.
12157
     *
12158
     * @param string      $str             <p>The input string.</p>
12159
     * @param int         $width           [optional] <p>The column width.</p>
12160
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12161
     * @param bool        $cut             [optional] <p>
12162
     *                                     If the cut is set to true, the string is
12163
     *                                     always wrapped at or before the specified width. So if you have
12164
     *                                     a word that is larger than the given width, it is broken apart.
12165
     *                                     </p>
12166
     * @param bool        $add_final_break [optional] <p>
12167
     *                                     If this flag is true, then the method will add a $break at the end
12168
     *                                     of the result string.
12169
     *                                     </p>
12170
     * @param string|null $delimiter       [optional] <p>
12171
     *                                     You can change the default behavior, where we split the string by newline.
12172
     *                                     </p>
12173
     *
12174
     * @return string
12175
     */
12176 1
    public static function wordwrap_per_line(
12177
        string $str,
12178
        int $width = 75,
12179
        string $break = "\n",
12180
        bool $cut = false,
12181
        bool $add_final_break = true,
12182
        string $delimiter = null
12183
    ): string {
12184 1
        if ($delimiter === null) {
12185 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12186
        } else {
12187 1
            $strings = \explode($delimiter, $str);
12188
        }
12189
12190 1
        $string_helper_array = [];
12191 1
        if ($strings !== false) {
12192 1
            foreach ($strings as $value) {
12193 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12194
            }
12195
        }
12196
12197 1
        if ($add_final_break) {
12198 1
            $final_break = $break;
12199
        } else {
12200 1
            $final_break = '';
12201
        }
12202
12203 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12204
    }
12205
12206
    /**
12207
     * Returns an array of Unicode White Space characters.
12208
     *
12209
     * @return string[] an array with numeric code point as key and White Space Character as value
12210
     */
12211 2
    public static function ws(): array
12212
    {
12213 2
        return self::$WHITESPACE;
12214
    }
12215
12216
    /**
12217
     * @param string $str
12218
     * @param string $encoding
12219
     *
12220
     * @return string
12221
     */
12222
    private static function html_entity_decode_helper(string $str, string $encoding): string
12223
    {
12224
        return (string) \preg_replace_callback(
12225
            "/&#\d{2,6};/",
12226
            /**
12227
             * @param string[] $matches
12228
             *
12229
             * @return string
12230
             */
12231
            static function (array $matches) use ($encoding): string {
12232
                $return_tmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
12233
                if ($return_tmp !== '"' && $return_tmp !== "'") {
12234
                    return $return_tmp;
12235
                }
12236
12237
                return $matches[0];
12238
            },
12239
            $str
12240
        );
12241
    }
12242
12243
    /**
12244
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
12245
     *
12246
     * @see http://hsivonen.iki.fi/php-utf8/
12247
     *
12248
     * @param string $str    <p>The string to be checked.</p>
12249
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12250
     *
12251
     * @return bool
12252
     */
12253 108
    private static function is_utf8_string(string $str, bool $strict = false): bool
12254
    {
12255 108
        if ($str === '') {
12256 14
            return true;
12257
        }
12258
12259 102
        if ($strict === true) {
12260 2
            $is_binary = self::is_binary($str, true);
12261
12262 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12263 2
                return false;
12264
            }
12265
12266
            if ($is_binary && self::is_utf32($str, false) !== false) {
12267
                return false;
12268
            }
12269
        }
12270
12271 102
        if (self::pcre_utf8_support() !== true) {
12272
            // If even just the first character can be matched, when the /u
12273
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12274
            // invalid, nothing at all will match, even if the string contains
12275
            // some valid sequences
12276
            return \preg_match('/^./us', $str, $ar) === 1;
12277
        }
12278
12279 102
        $mState = 0; // cached expected number of octets after the current octet
12280
        // until the beginning of the next UTF8 character sequence
12281 102
        $mUcs4 = 0; // cached Unicode character
12282 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12283
12284 102
        if (self::$ORD === null) {
12285
            self::$ORD = self::getData('ord');
12286
        }
12287
12288 102
        $len = \strlen($str);
12289
        /** @noinspection ForeachInvariantsInspection */
12290 102
        for ($i = 0; $i < $len; ++$i) {
12291 102
            $in = self::$ORD[$str[$i]];
12292
12293 102
            if ($mState === 0) {
12294
                // When mState is zero we expect either a US-ASCII character or a
12295
                // multi-octet sequence.
12296 102
                if ((0x80 & $in) === 0) {
12297
                    // US-ASCII, pass straight through.
12298 97
                    $mBytes = 1;
12299 83
                } elseif ((0xE0 & $in) === 0xC0) {
12300
                    // First octet of 2 octet sequence.
12301 73
                    $mUcs4 = $in;
12302 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12303 73
                    $mState = 1;
12304 73
                    $mBytes = 2;
12305 58
                } elseif ((0xF0 & $in) === 0xE0) {
12306
                    // First octet of 3 octet sequence.
12307 42
                    $mUcs4 = $in;
12308 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12309 42
                    $mState = 2;
12310 42
                    $mBytes = 3;
12311 29
                } elseif ((0xF8 & $in) === 0xF0) {
12312
                    // First octet of 4 octet sequence.
12313 18
                    $mUcs4 = $in;
12314 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12315 18
                    $mState = 3;
12316 18
                    $mBytes = 4;
12317 13
                } elseif ((0xFC & $in) === 0xF8) {
12318
                    /* First octet of 5 octet sequence.
12319
                     *
12320
                     * This is illegal because the encoded codepoint must be either
12321
                     * (a) not the shortest form or
12322
                     * (b) outside the Unicode range of 0-0x10FFFF.
12323
                     * Rather than trying to resynchronize, we will carry on until the end
12324
                     * of the sequence and let the later error handling code catch it.
12325
                     */
12326 5
                    $mUcs4 = $in;
12327 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12328 5
                    $mState = 4;
12329 5
                    $mBytes = 5;
12330 10
                } elseif ((0xFE & $in) === 0xFC) {
12331
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12332 5
                    $mUcs4 = $in;
12333 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12334 5
                    $mState = 5;
12335 5
                    $mBytes = 6;
12336
                } else {
12337
                    // Current octet is neither in the US-ASCII range nor a legal first
12338
                    // octet of a multi-octet sequence.
12339 102
                    return false;
12340
                }
12341 83
            } elseif ((0xC0 & $in) === 0x80) {
12342
12343
                // When mState is non-zero, we expect a continuation of the multi-octet
12344
                // sequence
12345
12346
                // Legal continuation.
12347 75
                $shift = ($mState - 1) * 6;
12348 75
                $tmp = $in;
12349 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12350 75
                $mUcs4 |= $tmp;
12351
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12352
                // Unicode code point to be output.
12353 75
                if (--$mState === 0) {
12354
                    // Check for illegal sequences and code points.
12355
                    //
12356
                    // From Unicode 3.1, non-shortest form is illegal
12357
                    if (
12358 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12359
                        ||
12360 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12361
                        ||
12362 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12363
                        ||
12364 75
                        ($mBytes > 4)
12365
                        ||
12366
                        // From Unicode 3.2, surrogate characters are illegal.
12367 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12368
                        ||
12369
                        // Code points outside the Unicode range are illegal.
12370 75
                        ($mUcs4 > 0x10FFFF)
12371
                    ) {
12372 8
                        return false;
12373
                    }
12374
                    // initialize UTF8 cache
12375 75
                    $mState = 0;
12376 75
                    $mUcs4 = 0;
12377 75
                    $mBytes = 1;
12378
                }
12379
            } else {
12380
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12381
                // Incomplete multi-octet sequence.
12382 35
                return false;
12383
            }
12384
        }
12385
12386 67
        return true;
12387
    }
12388
12389
    /**
12390
     * @param string $str
12391
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12392
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12393
     *
12394
     * @return string
12395
     */
12396 33
    private static function fixStrCaseHelper(
12397
        string $str,
12398
        $use_lowercase = false,
12399
        $use_full_case_fold = false
12400
    ): string {
12401 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12402 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12403
12404 33
        if ($use_lowercase === true) {
12405 2
            $str = \str_replace(
12406 2
                $upper,
12407 2
                $lower,
12408 2
                $str
12409
            );
12410
        } else {
12411 31
            $str = \str_replace(
12412 31
                $lower,
12413 31
                $upper,
12414 31
                $str
12415
            );
12416
        }
12417
12418 33
        if ($use_full_case_fold) {
12419 31
            static $FULL_CASE_FOLD = null;
12420 31
            if ($FULL_CASE_FOLD === null) {
12421 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12422
            }
12423
12424 31
            if ($use_lowercase === true) {
12425 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12426
            } else {
12427 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12428
            }
12429
        }
12430
12431 33
        return $str;
12432
    }
12433
12434
    /**
12435
     * get data from "/data/*.php"
12436
     *
12437
     * @param string $file
12438
     *
12439
     * @return array
12440
     */
12441 6
    private static function getData(string $file): array
12442
    {
12443
        /** @noinspection PhpIncludeInspection */
12444
        /** @noinspection UsingInclusionReturnValueInspection */
12445
        /** @psalm-suppress UnresolvableInclude */
12446 6
        return include __DIR__ . '/data/' . $file . '.php';
12447
    }
12448
12449
    /**
12450
     * @return true|null
12451
     */
12452 12
    private static function initEmojiData()
12453
    {
12454 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12455 1
            if (self::$EMOJI === null) {
12456 1
                self::$EMOJI = self::getData('emoji');
12457
            }
12458
12459 1
            \uksort(
12460 1
                self::$EMOJI,
12461
                static function (string $a, string $b): int {
12462 1
                    return \strlen($b) <=> \strlen($a);
12463 1
                }
12464
            );
12465
12466 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12467 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12468
12469 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12470 1
                $tmp_key = \crc32($key);
12471 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12472
            }
12473
12474 1
            return true;
12475
        }
12476
12477 12
        return null;
12478
    }
12479
12480
    /**
12481
     * Checks whether mbstring "overloaded" is active on the server.
12482
     *
12483
     * @return bool
12484
     */
12485
    private static function mbstring_overloaded(): bool
12486
    {
12487
        /**
12488
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12489
         */
12490
12491
        /** @noinspection PhpComposerExtensionStubsInspection */
12492
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12493
        return \defined('MB_OVERLOAD_STRING')
12494
               &&
12495
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12496
    }
12497
12498
    /**
12499
     * @param array    $strings
12500
     * @param bool     $remove_empty_values
12501
     * @param int|null $remove_short_values
12502
     *
12503
     * @return array
12504
     */
12505 2
    private static function reduce_string_array(
12506
        array $strings,
12507
        bool $remove_empty_values,
12508
        int $remove_short_values = null
12509
    ): array {
12510
        // init
12511 2
        $return = [];
12512
12513 2
        foreach ($strings as &$str) {
12514
            if (
12515 2
                $remove_short_values !== null
12516
                &&
12517 2
                \mb_strlen($str) <= $remove_short_values
12518
            ) {
12519 2
                continue;
12520
            }
12521
12522
            if (
12523 2
                $remove_empty_values === true
12524
                &&
12525 2
                \trim($str) === ''
12526
            ) {
12527 2
                continue;
12528
            }
12529
12530 2
            $return[] = $str;
12531
        }
12532
12533 2
        return $return;
12534
    }
12535
12536
    /**
12537
     * rxClass
12538
     *
12539
     * @param string $s
12540
     * @param string $class
12541
     *
12542
     * @return string
12543
     */
12544 33
    private static function rxClass(string $s, string $class = ''): string
12545
    {
12546 33
        static $RX_CLASS_CACHE = [];
12547
12548 33
        $cache_key = $s . $class;
12549
12550 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12551 21
            return $RX_CLASS_CACHE[$cache_key];
12552
        }
12553
12554 16
        $class_array = [$class];
12555
12556
        /** @noinspection SuspiciousLoopInspection */
12557
        /** @noinspection AlterInForeachInspection */
12558 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12559 15
            if ($s === '-') {
12560
                $class_array[0] = '-' . $class_array[0];
12561 15
            } elseif (!isset($s[2])) {
12562 15
                $class_array[0] .= \preg_quote($s, '/');
12563 1
            } elseif (self::strlen($s) === 1) {
12564 1
                $class_array[0] .= $s;
12565
            } else {
12566 15
                $class_array[] = $s;
12567
            }
12568
        }
12569
12570 16
        if ($class_array[0]) {
12571 16
            $class_array[0] = '[' . $class_array[0] . ']';
12572
        }
12573
12574 16
        if (\count($class_array) === 1) {
12575 16
            $return = $class_array[0];
12576
        } else {
12577
            $return = '(?:' . \implode('|', $class_array) . ')';
12578
        }
12579
12580 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12581
12582 16
        return $return;
12583
    }
12584
12585
    /**
12586
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12587
     *
12588
     * @param string $names
12589
     * @param string $delimiter
12590
     * @param string $encoding
12591
     *
12592
     * @return string
12593
     */
12594 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12595
    {
12596
        // init
12597 1
        $name_helper_array = \explode($delimiter, $names);
12598 1
        if ($name_helper_array === false) {
12599
            return '';
12600
        }
12601
12602
        $special_cases = [
12603 1
            'names' => [
12604
                'ab',
12605
                'af',
12606
                'al',
12607
                'and',
12608
                'ap',
12609
                'bint',
12610
                'binte',
12611
                'da',
12612
                'de',
12613
                'del',
12614
                'den',
12615
                'der',
12616
                'di',
12617
                'dit',
12618
                'ibn',
12619
                'la',
12620
                'mac',
12621
                'nic',
12622
                'of',
12623
                'ter',
12624
                'the',
12625
                'und',
12626
                'van',
12627
                'von',
12628
                'y',
12629
                'zu',
12630
            ],
12631
            'prefixes' => [
12632
                'al-',
12633
                "d'",
12634
                'ff',
12635
                "l'",
12636
                'mac',
12637
                'mc',
12638
                'nic',
12639
            ],
12640
        ];
12641
12642 1
        foreach ($name_helper_array as &$name) {
12643 1
            if (\in_array($name, $special_cases['names'], true)) {
12644 1
                continue;
12645
            }
12646
12647 1
            $continue = false;
12648
12649 1
            if ($delimiter === '-') {
12650
                /** @noinspection AlterInForeachInspection */
12651 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12652 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12653 1
                        $continue = true;
12654
                    }
12655
                }
12656
            }
12657
12658
            /** @noinspection AlterInForeachInspection */
12659 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12660 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12661 1
                    $continue = true;
12662
                }
12663
            }
12664
12665 1
            if ($continue === true) {
12666 1
                continue;
12667
            }
12668
12669 1
            $name = self::ucfirst($name);
12670
        }
12671
12672 1
        return \implode($delimiter, $name_helper_array);
12673
    }
12674
12675
    /**
12676
     * Generic case-sensitive transformation for collation matching.
12677
     *
12678
     * @param string $str <p>The input string</p>
12679
     *
12680
     * @return string|null
12681
     */
12682 6
    private static function strtonatfold(string $str)
12683
    {
12684
        /** @noinspection PhpUndefinedClassInspection */
12685 6
        return \preg_replace(
12686 6
            '/\p{Mn}+/u',
12687 6
            '',
12688 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12689
        );
12690
    }
12691
12692
    /**
12693
     * @param int|string $input
12694
     *
12695
     * @return string
12696
     */
12697 31
    private static function to_utf8_convert_helper($input): string
12698
    {
12699
        // init
12700 31
        $buf = '';
12701
12702 31
        if (self::$ORD === null) {
12703 1
            self::$ORD = self::getData('ord');
12704
        }
12705
12706 31
        if (self::$CHR === null) {
12707 1
            self::$CHR = self::getData('chr');
12708
        }
12709
12710 31
        if (self::$WIN1252_TO_UTF8 === null) {
12711 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12712
        }
12713
12714 31
        $ordC1 = self::$ORD[$input];
12715 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12716 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12717
        } else {
12718
            /** @noinspection OffsetOperationsInspection */
12719 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12720 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12721 1
            $buf .= $cc1 . $cc2;
12722
        }
12723
12724 31
        return $buf;
12725
    }
12726
12727
    /**
12728
     * @param string $str
12729
     *
12730
     * @return string
12731
     */
12732 9
    private static function urldecode_unicode_helper(string $str): string
12733
    {
12734 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12735 9
        if (\preg_match($pattern, $str)) {
12736 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12737
        }
12738
12739 9
        return $str;
12740
    }
12741
}
12742