Passed
Push — master ( 4b4b13...ba519a )
by Lars
07:49
created

UTF8   F

Complexity

Total Complexity 1697

Size/Duplication

Total Lines 12869
Duplicated Lines 0 %

Test Coverage

Coverage 79.5%

Importance

Changes 98
Bugs 52 Features 6
Metric Value
eloc 4353
c 98
b 52
f 6
dl 0
loc 12869
ccs 3052
cts 3839
cp 0.795
rs 0.8
wmc 1697

298 Methods

Rating   Name   Duplication   Size   Complexity  
A ctype_loaded() 0 3 1
A decimal_to_chr() 0 3 1
A hasBom() 0 3 1
A str_substr_after_first_separator() 0 28 6
A str_starts_with() 0 11 3
A str_starts_with_any() 0 17 5
A isBinary() 0 3 1
A html_escape() 0 6 1
A isHtml() 0 3 1
A isBase64() 0 3 1
A isUtf32() 0 3 1
A is_alpha() 0 8 2
A isUtf8() 0 3 1
A htmlspecialchars() 0 15 3
A intlChar_loaded() 0 3 1
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A iconv_loaded() 0 3 1
A isAscii() 0 3 1
A isUtf16() 0 3 1
A is_alphanumeric() 0 8 2
A int_to_hex() 0 7 2
A htmlentities() 0 28 3
A isJson() 0 3 1
B is_json() 0 29 8
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A chr_to_int() 0 3 1
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 3 1
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
B chr_to_decimal() 0 38 8
D chr() 0 101 18
A chunk_split() 0 3 1
A chr_map() 0 5 1
A chr_size_list() 0 17 3
A checkForSupport() 0 47 4
A chr_to_hex() 0 11 3
A file_has_bom() 0 8 2
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 70 10
A parse_str() 0 16 4
A filter_input() 0 13 3
A str_contains() 0 10 2
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
A str_isubstr_last() 0 25 4
A str_replace_beginning() 0 24 6
A has_uppercase() 0 8 2
A remove_left() 0 24 4
A str_offset_exists() 0 10 2
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
A ltrim() 0 27 5
A emoji_decode() 0 18 2
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
A lcword() 0 13 1
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
D normalize_encoding() 0 142 16
B get_file_type() 0 61 7
A str_ensure_right() 0 13 4
C is_utf16() 0 68 16
C filter() 0 59 13
A normalize_whitespace() 0 9 1
A str_humanize() 0 15 1
A is_html() 0 14 2
A decode_mimeheader() 0 15 5
A html_decode() 0 6 1
A str_index_first() 0 11 1
A str_ireplace_ending() 0 21 6
A rtrim() 0 27 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A str_iindex_first() 0 11 1
A str_isubstr_before_first_separator() 0 19 5
A replace_all() 0 11 2
A removeBOM() 0 3 1
A emoji_encode() 0 18 2
A str_matches_pattern() 0 3 1
B get_random_string() 0 56 10
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A str_pad_right() 0 12 1
A first_char() 0 14 4
A str_iends() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 48 6
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
D str_pad() 0 146 16
A str_ireplace() 0 18 3
A str_replace_ending() 0 24 6
A str_contains_all() 0 23 6
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B rawurldecode() 0 51 8
A str_ends() 0 3 1
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 8 2
A str_replace() 0 14 1
D getCharDirection() 0 105 118
A replace() 0 11 2
A filter_var_array() 0 12 2
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
A codepoints() 0 36 5
A lowerCaseFirst() 0 13 1
A str_ends_with_any() 0 13 4
A cleanup() 0 25 2
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
B is_binary() 0 35 9
A lcfirst() 0 44 5
A finfo_loaded() 0 3 1
A str_ends_with() 0 11 3
A fits_inside() 0 3 1
A is_binary_file() 0 16 3
B str_longest_common_prefix() 0 51 8
A str_pad_left() 0 12 1
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A str_iends_with_any() 0 13 4
A str_isubstr_after_first_separator() 0 26 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
B str_snakeize() 0 55 6
A is_lowercase() 0 8 2
A str_sort() 0 15 3
A str_offset_get() 0 14 4
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 55 11
A lcwords() 0 34 6
A normalizeEncoding() 0 3 1
A filter_var() 0 12 2
A is_empty() 0 3 1
B html_encode() 0 53 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 140 37
C is_utf32() 0 68 16
C ord() 0 72 16
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 20 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A hex_to_int() 0 14 3
A hex_to_chr() 0 3 1
A filter_input_array() 0 12 3
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 38 5
B str_delimit() 0 33 8
A min() 0 14 3
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 55 13
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
A str_shuffle() 0 35 6
B file_get_contents() 0 56 11
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 99 19
B stripos() 0 59 11
D strrchr() 0 101 20
A to_filename() 0 9 1
C utf8_decode() 0 61 13
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A str_substr_last() 0 33 6
A toUTF8() 0 3 1
A string() 0 12 3
B rxClass() 0 39 8
B str_titleize_for_humans() 0 160 7
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A strlen_in_byte() 0 12 3
A titlecase() 0 31 5
A getData() 0 6 1
B strtolower() 0 54 10
B urldecode() 0 51 8
B strrev() 0 43 10
D substr_replace() 0 124 27
A strstr_in_byte() 0 15 4
C str_titleize() 0 69 12
A ws() 0 3 1
A toLatin1() 0 3 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A trim() 0 27 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
A urldecode_unicode_helper() 0 8 2
A to_latin1() 0 3 1
A string_has_bom() 0 10 3
B strtr() 0 34 8
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B str_capitalize_name_helper() 0 82 10
A utf8_encode() 0 16 3
A substr_iright() 0 15 4
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
D str_split() 0 132 30
A strrpos_in_byte() 0 12 4
F strrpos() 0 119 25
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
A mbstring_overloaded() 0 11 2
A str_substr_first() 0 33 6
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_substr_after_last_separator() 0 28 6
D to_utf8() 0 117 35
A ucword() 0 6 1
A str_underscored() 0 3 1
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_upper_first() 0 13 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
A strtonatfold() 0 7 1
C strcspn() 0 52 12
A fixStrCaseHelper() 0 36 5
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A utf8_fix_win1252_chars() 0 3 1
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B strtoupper() 0 54 10
B strrichr() 0 54 11
A initEmojiData() 0 26 4
F strpos() 0 131 27
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // HALFWIDTH HANGUL FILLER
92
        65440 => "\xef\xbe\xa0",
93
        // IDEOGRAPHIC SPACE
94
        12288 => "\xe3\x80\x80",
95
    ];
96
97
    /**
98
     * @var array
99
     */
100
    private static $WHITESPACE_TABLE = [
101
        'SPACE'                     => "\x20",
102
        'NO-BREAK SPACE'            => "\xc2\xa0",
103
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
104
        'EN QUAD'                   => "\xe2\x80\x80",
105
        'EM QUAD'                   => "\xe2\x80\x81",
106
        'EN SPACE'                  => "\xe2\x80\x82",
107
        'EM SPACE'                  => "\xe2\x80\x83",
108
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
109
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
110
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
111
        'FIGURE SPACE'              => "\xe2\x80\x87",
112
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
113
        'THIN SPACE'                => "\xe2\x80\x89",
114
        'HAIR SPACE'                => "\xe2\x80\x8a",
115
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
116
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
117
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
118
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
119
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
120
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
121
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
122
    ];
123
124
    /**
125
     * @var array{upper: string[], lower: string[]}
126
     */
127
    private static $COMMON_CASE_FOLD = [
128
        'upper' => [
129
            'µ',
130
            'ſ',
131
            "\xCD\x85",
132
            'ς',
133
            'ẞ',
134
            "\xCF\x90",
135
            "\xCF\x91",
136
            "\xCF\x95",
137
            "\xCF\x96",
138
            "\xCF\xB0",
139
            "\xCF\xB1",
140
            "\xCF\xB5",
141
            "\xE1\xBA\x9B",
142
            "\xE1\xBE\xBE",
143
        ],
144
        'lower' => [
145
            'μ',
146
            's',
147
            'ι',
148
            'σ',
149
            'ß',
150
            'β',
151
            'θ',
152
            'φ',
153
            'π',
154
            'κ',
155
            'ρ',
156
            'ε',
157
            "\xE1\xB9\xA1",
158
            'ι',
159
        ],
160
    ];
161
162
    /**
163
     * @var array
164
     */
165
    private static $SUPPORT = [];
166
167
    /**
168
     * @var array|null
169
     */
170
    private static $BROKEN_UTF8_FIX;
171
172
    /**
173
     * @var array|null
174
     */
175
    private static $WIN1252_TO_UTF8;
176
177
    /**
178
     * @var array|null
179
     */
180
    private static $INTL_TRANSLITERATOR_LIST;
181
182
    /**
183
     * @var array|null
184
     */
185
    private static $ENCODINGS;
186
187
    /**
188
     * @var array|null
189
     */
190
    private static $ORD;
191
192
    /**
193
     * @var array|null
194
     */
195
    private static $EMOJI;
196
197
    /**
198
     * @var array|null
199
     */
200
    private static $EMOJI_VALUES_CACHE;
201
202
    /**
203
     * @var array|null
204
     */
205
    private static $EMOJI_KEYS_CACHE;
206
207
    /**
208
     * @var array|null
209
     */
210
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
211
212
    /**
213
     * @var array|null
214
     */
215
    private static $CHR;
216
217
    /**
218
     * __construct()
219
     */
220 33
    public function __construct()
221
    {
222 33
    }
223
224
    /**
225
     * Return the character at the specified position: $str[1] like functionality.
226
     *
227
     * @param string $str      <p>A UTF-8 string.</p>
228
     * @param int    $pos      <p>The position of character to return.</p>
229
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
230
     *
231
     * @return string single multi-byte character
232
     */
233 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
234
    {
235 3
        if ($str === '' || $pos < 0) {
236 2
            return '';
237
        }
238
239 3
        if ($encoding === 'UTF-8') {
240 3
            return (string) \mb_substr($str, $pos, 1);
241
        }
242
243
        return (string) self::substr($str, $pos, 1, $encoding);
244
    }
245
246
    /**
247
     * Prepends UTF-8 BOM character to the string and returns the whole string.
248
     *
249
     * INFO: If BOM already existed there, the Input string is returned.
250
     *
251
     * @param string $str <p>The input string.</p>
252
     *
253
     * @return string the output string that contains BOM
254
     */
255 2
    public static function add_bom_to_string(string $str): string
256
    {
257 2
        if (self::string_has_bom($str) === false) {
258 2
            $str = self::bom() . $str;
259
        }
260
261 2
        return $str;
262
    }
263
264
    /**
265
     * Changes all keys in an array.
266
     *
267
     * @param array  $array    <p>The array to work on</p>
268
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
269
     *                         or <strong>CASE_LOWER</strong> (default)</p>
270
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
271
     *
272
     * @return string[]
273
     *                  <p>An array with its keys lower- or uppercased.</p>
274
     */
275 2
    public static function array_change_key_case(
276
        array $array,
277
        int $case = \CASE_LOWER,
278
        string $encoding = 'UTF-8'
279
    ): array {
280
        if (
281 2
            $case !== \CASE_LOWER
282
            &&
283 2
            $case !== \CASE_UPPER
284
        ) {
285
            $case = \CASE_LOWER;
286
        }
287
288 2
        $return = [];
289 2
        foreach ($array as $key => &$value) {
290 2
            $key = $case === \CASE_LOWER
291 2
                ? self::strtolower((string) $key, $encoding)
292 2
                : self::strtoupper((string) $key, $encoding);
293
294 2
            $return[$key] = $value;
295
        }
296
297 2
        return $return;
298
    }
299
300
    /**
301
     * Returns the substring between $start and $end, if found, or an empty
302
     * string. An optional offset may be supplied from which to begin the
303
     * search for the start string.
304
     *
305
     * @param string $str
306
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
307
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
308
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @return string
312
     */
313 16
    public static function between(
314
        string $str,
315
        string $start,
316
        string $end,
317
        int $offset = 0,
318
        string $encoding = 'UTF-8'
319
    ): string {
320 16
        if ($encoding === 'UTF-8') {
321 8
            $start_position = \mb_strpos($str, $start, $offset);
322 8
            if ($start_position === false) {
323 1
                return '';
324
            }
325
326 7
            $substr_index = $start_position + (int) \mb_strlen($start);
327 7
            $end_position = \mb_strpos($str, $end, $substr_index);
328
            if (
329 7
                $end_position === false
330
                ||
331 7
                $end_position === $substr_index
332
            ) {
333 2
                return '';
334
            }
335
336 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
337
        }
338
339 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
340
341 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
342 8
        if ($start_position === false) {
343 1
            return '';
344
        }
345
346 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
347 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
348
        if (
349 7
            $end_position === false
350
            ||
351 7
            $end_position === $substr_index
352
        ) {
353 2
            return '';
354
        }
355
356 5
        return (string) self::substr(
357 5
            $str,
358 5
            $substr_index,
359 5
            $end_position - $substr_index,
360 5
            $encoding
361
        );
362
    }
363
364
    /**
365
     * Convert binary into a string.
366
     *
367
     * @param mixed $bin 1|0
368
     *
369
     * @return string
370
     */
371 2
    public static function binary_to_str($bin): string
372
    {
373 2
        if (!isset($bin[0])) {
374
            return '';
375
        }
376
377 2
        $convert = \base_convert($bin, 2, 16);
378 2
        if ($convert === '0') {
379 1
            return '';
380
        }
381
382 2
        return \pack('H*', $convert);
383
    }
384
385
    /**
386
     * Returns the UTF-8 Byte Order Mark Character.
387
     *
388
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
389
     *
390
     * @return string UTF-8 Byte Order Mark
391
     */
392 4
    public static function bom(): string
393
    {
394 4
        return "\xef\xbb\xbf";
395
    }
396
397
    /**
398
     * @alias of UTF8::chr_map()
399
     *
400
     * @param callable $callback
401
     * @param string   $str
402
     *
403
     * @return string[]
404
     *
405
     * @see UTF8::chr_map()
406
     */
407 2
    public static function callback($callback, string $str): array
408
    {
409 2
        return self::chr_map($callback, $str);
410
    }
411
412
    /**
413
     * Returns the character at $index, with indexes starting at 0.
414
     *
415
     * @param string $str      <p>The input string.</p>
416
     * @param int    $index    <p>Position of the character.</p>
417
     * @param string $encoding [optional] <p>Default is UTF-8</p>
418
     *
419
     * @return string the character at $index
420
     */
421 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
422
    {
423 9
        if ($encoding === 'UTF-8') {
424 5
            return (string) \mb_substr($str, $index, 1);
425
        }
426
427 4
        return (string) self::substr($str, $index, 1, $encoding);
428
    }
429
430
    /**
431
     * Returns an array consisting of the characters in the string.
432
     *
433
     * @param string $str <p>The input string.</p>
434
     *
435
     * @return string[] an array of chars
436
     */
437 3
    public static function chars(string $str): array
438
    {
439 3
        return self::str_split($str);
440
    }
441
442
    /**
443
     * This method will auto-detect your server environment for UTF-8 support.
444
     *
445
     * @return true|null
446
     *
447
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
448
     */
449 5
    public static function checkForSupport()
450
    {
451 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
452
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
453
454
            // http://php.net/manual/en/book.mbstring.php
455
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
456
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
457
            if (self::$SUPPORT['mbstring'] === true) {
458
                \mb_internal_encoding('UTF-8');
459
                /** @noinspection UnusedFunctionResultInspection */
460
                /** @noinspection PhpComposerExtensionStubsInspection */
461
                \mb_regex_encoding('UTF-8');
462
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
463
            }
464
465
            // http://php.net/manual/en/book.iconv.php
466
            self::$SUPPORT['iconv'] = self::iconv_loaded();
467
468
            // http://php.net/manual/en/book.intl.php
469
            self::$SUPPORT['intl'] = self::intl_loaded();
470
471
            // http://php.net/manual/en/class.intlchar.php
472
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
473
474
            // http://php.net/manual/en/book.ctype.php
475
            self::$SUPPORT['ctype'] = self::ctype_loaded();
476
477
            // http://php.net/manual/en/class.finfo.php
478
            self::$SUPPORT['finfo'] = self::finfo_loaded();
479
480
            // http://php.net/manual/en/book.json.php
481
            self::$SUPPORT['json'] = self::json_loaded();
482
483
            // http://php.net/manual/en/book.pcre.php
484
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
485
486
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
487
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
488
                \mb_internal_encoding('UTF-8');
489
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
490
            }
491
492
            return true;
493
        }
494
495 5
        return null;
496
    }
497
498
    /**
499
     * Generates a UTF-8 encoded character from the given code point.
500
     *
501
     * INFO: opposite to UTF8::ord()
502
     *
503
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
504
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
505
     *
506
     * @return string|null multi-byte character, returns null on failure or empty input
507
     */
508 21
    public static function chr($code_point, string $encoding = 'UTF-8')
509
    {
510
        // init
511 21
        static $CHAR_CACHE = [];
512
513 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
514 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
515
        }
516
517
        if (
518 21
            $encoding !== 'UTF-8'
519
            &&
520 21
            $encoding !== 'ISO-8859-1'
521
            &&
522 21
            $encoding !== 'WINDOWS-1252'
523
            &&
524 21
            self::$SUPPORT['mbstring'] === false
525
        ) {
526
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
527
        }
528
529 21
        $cache_key = $code_point . $encoding;
530 21
        if (isset($CHAR_CACHE[$cache_key]) === true) {
531 19
            return $CHAR_CACHE[$cache_key];
532
        }
533
534 12
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
535
536 12
            if (self::$CHR === null) {
537
                self::$CHR = self::getData('chr');
538
            }
539
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543 12
            $chr = self::$CHR[$code_point];
544
545 12
            if ($encoding !== 'UTF-8') {
546 1
                $chr = self::encode($encoding, $chr);
547
            }
548
549 12
            return $CHAR_CACHE[$cache_key] = $chr;
550
        }
551
552
        //
553
        // fallback via "IntlChar"
554
        //
555
556 5
        if (self::$SUPPORT['intlChar'] === true) {
557
            /** @noinspection PhpComposerExtensionStubsInspection */
558 5
            $chr = \IntlChar::chr($code_point);
559
560 5
            if ($encoding !== 'UTF-8') {
561
                $chr = self::encode($encoding, $chr);
562
            }
563
564 5
            return $CHAR_CACHE[$cache_key] = $chr;
565
        }
566
567
        //
568
        // fallback via vanilla php
569
        //
570
571
        if (self::$CHR === null) {
572
            self::$CHR = self::getData('chr');
573
        }
574
575
        $code_point = (int) $code_point;
576
        if ($code_point <= 0x7F) {
577
            /**
578
             * @psalm-suppress PossiblyNullArrayAccess
579
             */
580
            $chr = self::$CHR[$code_point];
581
        } elseif ($code_point <= 0x7FF) {
582
            /**
583
             * @psalm-suppress PossiblyNullArrayAccess
584
             */
585
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
586
                   self::$CHR[($code_point & 0x3F) + 0x80];
587
        } elseif ($code_point <= 0xFFFF) {
588
            /**
589
             * @psalm-suppress PossiblyNullArrayAccess
590
             */
591
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
592
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
593
                   self::$CHR[($code_point & 0x3F) + 0x80];
594
        } else {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
599
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
600
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
601
                   self::$CHR[($code_point & 0x3F) + 0x80];
602
        }
603
604
        if ($encoding !== 'UTF-8') {
605
            $chr = self::encode($encoding, $chr);
606
        }
607
608
        return $CHAR_CACHE[$cache_key] = $chr;
609
    }
610
611
    /**
612
     * Applies callback to all characters of a string.
613
     *
614
     * @param callable $callback <p>The callback function.</p>
615
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
616
     *
617
     * @return string[]
618
     *                  <p>The outcome of the callback, as array.</p>
619
     */
620 2
    public static function chr_map($callback, string $str): array
621
    {
622 2
        return \array_map(
623 2
            $callback,
624 2
            self::str_split($str)
625
        );
626
    }
627
628
    /**
629
     * Generates an array of byte length of each character of a Unicode string.
630
     *
631
     * 1 byte => U+0000  - U+007F
632
     * 2 byte => U+0080  - U+07FF
633
     * 3 byte => U+0800  - U+FFFF
634
     * 4 byte => U+10000 - U+10FFFF
635
     *
636
     * @param string $str <p>The original unicode string.</p>
637
     *
638
     * @return int[] an array of byte lengths of each character
639
     */
640 4
    public static function chr_size_list(string $str): array
641
    {
642 4
        if ($str === '') {
643 4
            return [];
644
        }
645
646 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
647
            return \array_map(
648
                static function (string $data): int {
649
                    // "mb_" is available if overload is used, so use it ...
650
                    return \mb_strlen($data, 'CP850'); // 8-BIT
651
                },
652
                self::str_split($str)
653
            );
654
        }
655
656 4
        return \array_map('\strlen', self::str_split($str));
657
    }
658
659
    /**
660
     * Get a decimal code representation of a specific character.
661
     *
662
     * @param string $char <p>The input character.</p>
663
     *
664
     * @return int
665
     */
666 4
    public static function chr_to_decimal(string $char): int
667
    {
668 4
        if (self::$SUPPORT['iconv'] === true) {
669 4
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
670 4
            if ($chr_tmp !== false) {
671
                /** @noinspection OffsetOperationsInspection */
672 4
                return \unpack('V', $chr_tmp)[1];
673
            }
674
        }
675
676
        $code = self::ord($char[0]);
677
        $bytes = 1;
678
679
        if (!($code & 0x80)) {
680
            // 0xxxxxxx
681
            return $code;
682
        }
683
684
        if (($code & 0xe0) === 0xc0) {
685
            // 110xxxxx
686
            $bytes = 2;
687
            $code &= ~0xc0;
688
        } elseif (($code & 0xf0) === 0xe0) {
689
            // 1110xxxx
690
            $bytes = 3;
691
            $code &= ~0xe0;
692
        } elseif (($code & 0xf8) === 0xf0) {
693
            // 11110xxx
694
            $bytes = 4;
695
            $code &= ~0xf0;
696
        }
697
698
        for ($i = 2; $i <= $bytes; ++$i) {
699
            // 10xxxxxx
700
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
701
        }
702
703
        return $code;
704
    }
705
706
    /**
707
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
708
     *
709
     * @param int|string $char   <p>The input character</p>
710
     * @param string     $prefix [optional]
711
     *
712
     * @return string The code point encoded as U+xxxx
713
     */
714 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
715
    {
716 2
        if ($char === '') {
717 2
            return '';
718
        }
719
720 2
        if ($char === '&#0;') {
721 2
            $char = '';
722
        }
723
724 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
725
    }
726
727
    /**
728
     * alias for "UTF8::chr_to_decimal()"
729
     *
730
     * @param string $chr
731
     *
732
     * @return int
733
     *
734
     * @see UTF8::chr_to_decimal()
735
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
736
     */
737 2
    public static function chr_to_int(string $chr): int
738
    {
739 2
        return self::chr_to_decimal($chr);
740
    }
741
742
    /**
743
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
744
     *
745
     * @param string $body         <p>The original string to be split.</p>
746
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
747
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
748
     *
749
     * @return string the chunked string
750
     */
751 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
752
    {
753 4
        return \implode($end, self::str_split($body, $chunk_length));
754
    }
755
756
    /**
757
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
758
     *
759
     * @param string $str                                     <p>The string to be sanitized.</p>
760
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
761
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
762
     *                                                        whitespace.</p>
763
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS Word chars
764
     *                                                        e.g.: "…"
765
     *                                                        => "..."</p>
766
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces, in
767
     *                                                        combination with
768
     *                                                        $normalize_whitespace</p>
769
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond question
770
     *                                                        mark e.g.: "�"</p>
771
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove invisible
772
     *                                                        characters e.g.: "\0"</p>
773
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove invisible
774
     *                                                        url encoded characters e.g.: "%0B"<br>
775
     *                                                        WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
776
     *                                                        </p>
777
     *
778
     * @return string clean UTF-8 encoded string
779
     */
780 87
    public static function clean(
781
        string $str,
782
        bool $remove_bom = false,
783
        bool $normalize_whitespace = false,
784
        bool $normalize_msword = false,
785
        bool $keep_non_breaking_space = false,
786
        bool $replace_diamond_question_mark = false,
787
        bool $remove_invisible_characters = true,
788
        bool $remove_invisible_characters_url_encoded = false
789
    ): string {
790
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
791
        // caused connection reset problem on larger strings
792
793 87
        $regex = '/
794
          (
795
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
796
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
797
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
798
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
799
            ){1,100}                      # ...one or more times
800
          )
801
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
802
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
803
        /x';
804
        /** @noinspection NotOptimalRegularExpressionsInspection */
805 87
        $str = (string) \preg_replace($regex, '$1', $str);
806
807 87
        if ($replace_diamond_question_mark === true) {
808 33
            $str = self::replace_diamond_question_mark($str, '');
809
        }
810
811 87
        if ($remove_invisible_characters === true) {
812 87
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
813
        }
814
815 87
        if ($normalize_whitespace === true) {
816 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
817
        }
818
819 87
        if ($normalize_msword === true) {
820 4
            $str = self::normalize_msword($str);
821
        }
822
823 87
        if ($remove_bom === true) {
824 37
            $str = self::remove_bom($str);
825
        }
826
827 87
        return $str;
828
    }
829
830
    /**
831
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
832
     *
833
     * @param string $str <p>The input string.</p>
834
     *
835
     * @return string
836
     */
837 33
    public static function cleanup($str): string
838
    {
839
        // init
840 33
        $str = (string) $str;
841
842 33
        if ($str === '') {
843 5
            return '';
844
        }
845
846
        // fixed ISO <-> UTF-8 Errors
847 33
        $str = self::fix_simple_utf8($str);
848
849
        // remove all none UTF-8 symbols
850
        // && remove diamond question mark (�)
851
        // && remove remove invisible characters (e.g. "\0")
852
        // && remove BOM
853
        // && normalize whitespace chars (but keep non-breaking-spaces)
854 33
        return self::clean(
855 33
            $str,
856 33
            true,
857 33
            true,
858 33
            false,
859 33
            true,
860 33
            true,
861 33
            true
862
        );
863
    }
864
865
    /**
866
     * Accepts a string or a array of strings and returns an array of Unicode code points.
867
     *
868
     * INFO: opposite to UTF8::string()
869
     *
870
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
871
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
872
     *                                 default, code points will be returned as integers.</p>
873
     *
874
     * @return array<int|string>
875
     *                           The array of code points:<br>
876
     *                           array<int> for $u_style === false<br>
877
     *                           array<string> for $u_style === true<br>
878
     */
879 12
    public static function codepoints($arg, bool $u_style = false): array
880
    {
881 12
        if (\is_string($arg) === true) {
882 12
            $arg = self::str_split($arg);
883
        }
884
885
        /**
886
         * @psalm-suppress DocblockTypeContradiction
887
         */
888 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
889 4
            return [];
890
        }
891
892 12
        if ($arg === []) {
893 7
            return [];
894
        }
895
896 11
        $arg = \array_map(
897
            [
898 11
                self::class,
899
                'ord',
900
            ],
901 11
            $arg
902
        );
903
904 11
        if ($u_style === true) {
905 2
            $arg = \array_map(
906
                [
907 2
                    self::class,
908
                    'int_to_hex',
909
                ],
910 2
                $arg
911
            );
912
        }
913
914 11
        return $arg;
915
    }
916
917
    /**
918
     * Trims the string and replaces consecutive whitespace characters with a
919
     * single space. This includes tabs and newline characters, as well as
920
     * multibyte whitespace such as the thin space and ideographic space.
921
     *
922
     * @param string $str <p>The input string.</p>
923
     *
924
     * @return string string with a trimmed $str and condensed whitespace
925
     */
926 13
    public static function collapse_whitespace(string $str): string
927
    {
928 13
        if (self::$SUPPORT['mbstring'] === true) {
929
            /** @noinspection PhpComposerExtensionStubsInspection */
930 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
931
        }
932
933
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
934
    }
935
936
    /**
937
     * Returns count of characters used in a string.
938
     *
939
     * @param string $str                     <p>The input string.</p>
940
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
941
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
942
     *
943
     * @return int[] an associative array of Character as keys and
944
     *               their count as values
945
     */
946 19
    public static function count_chars(
947
        string $str,
948
        bool $clean_utf8 = false,
949
        bool $try_to_use_mb_functions = true
950
    ): array {
951 19
        return \array_count_values(
952 19
            self::str_split(
953 19
                $str,
954 19
                1,
955 19
                $clean_utf8,
956 19
                $try_to_use_mb_functions
957
            )
958
        );
959
    }
960
961
    /**
962
     * Remove css media-queries.
963
     *
964
     * @param string $str
965
     *
966
     * @return string
967
     */
968 1
    public static function css_stripe_media_queries(string $str): string
969
    {
970 1
        return (string) \preg_replace(
971 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
972 1
            '',
973 1
            $str
974
        );
975
    }
976
977
    /**
978
     * Checks whether ctype is available on the server.
979
     *
980
     * @return bool
981
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
982
     */
983
    public static function ctype_loaded(): bool
984
    {
985
        return \extension_loaded('ctype');
986
    }
987
988
    /**
989
     * Converts an int value into a UTF-8 character.
990
     *
991
     * @param mixed $int
992
     *
993
     * @return string
994
     */
995 19
    public static function decimal_to_chr($int): string
996
    {
997 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
998
    }
999
1000
    /**
1001
     * Decodes a MIME header field
1002
     *
1003
     * @param string $str
1004
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1005
     *
1006
     * @return false|string
1007
     *                      A decoded MIME field on success,
1008
     *                      or false if an error occurs during the decoding
1009
     */
1010
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1011
    {
1012
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1013
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1014
        }
1015
1016
        if (self::$SUPPORT['iconv'] === true) {
1017
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1018
        }
1019
1020
        if ($encoding !== 'UTF-8') {
1021
            $str = self::encode($encoding, $str);
1022
        }
1023
1024
        return \mb_decode_mimeheader($str);
1025
    }
1026
1027
    /**
1028
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1029
     *
1030
     * @param string $str                            <p>The input string.</p>
1031
     * @param bool   $use_reversible_string_mappings [optional] <p>
1032
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1033
     *                                               between "emoji_encode" and "emoji_decode".</p>
1034
     *
1035
     * @return string
1036
     */
1037 9
    public static function emoji_decode(
1038
        string $str,
1039
        bool $use_reversible_string_mappings = false
1040
    ): string {
1041 9
        self::initEmojiData();
1042
1043 9
        if ($use_reversible_string_mappings === true) {
1044 9
            return (string) \str_replace(
1045 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1046 9
                (array) self::$EMOJI_VALUES_CACHE,
1047 9
                $str
1048
            );
1049
        }
1050
1051 1
        return (string) \str_replace(
1052 1
            (array) self::$EMOJI_KEYS_CACHE,
1053 1
            (array) self::$EMOJI_VALUES_CACHE,
1054 1
            $str
1055
        );
1056
    }
1057
1058
    /**
1059
     * Encode a string with emoji chars into a non-emoji string.
1060
     *
1061
     * @param string $str                            <p>The input string</p>
1062
     * @param bool   $use_reversible_string_mappings [optional] <p>
1063
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1064
     *                                               between "emoji_encode" and "emoji_decode"</p>
1065
     *
1066
     * @return string
1067
     */
1068 12
    public static function emoji_encode(
1069
        string $str,
1070
        bool $use_reversible_string_mappings = false
1071
    ): string {
1072 12
        self::initEmojiData();
1073
1074 12
        if ($use_reversible_string_mappings === true) {
1075 9
            return (string) \str_replace(
1076 9
                (array) self::$EMOJI_VALUES_CACHE,
1077 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1078 9
                $str
1079
            );
1080
        }
1081
1082 4
        return (string) \str_replace(
1083 4
            (array) self::$EMOJI_VALUES_CACHE,
1084 4
            (array) self::$EMOJI_KEYS_CACHE,
1085 4
            $str
1086
        );
1087
    }
1088
1089
    /**
1090
     * Encode a string with a new charset-encoding.
1091
     *
1092
     * INFO:  This function will also try to fix broken / double encoding,
1093
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1094
     *
1095
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1096
     * @param string $str                           <p>The input string</p>
1097
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1098
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1099
     *                                              string-encoding</p>
1100
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1101
     *                                              A empty string will trigger the autodetect anyway.</p>
1102
     *
1103
     * @return string
1104
     *
1105
     * @psalm-suppress InvalidReturnStatement
1106
     */
1107 28
    public static function encode(
1108
        string $to_encoding,
1109
        string $str,
1110
        bool $auto_detect_the_from_encoding = true,
1111
        string $from_encoding = ''
1112
    ): string {
1113 28
        if ($str === '' || $to_encoding === '') {
1114 13
            return $str;
1115
        }
1116
1117 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1118 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1119
        }
1120
1121 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1122 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1123
        }
1124
1125
        if (
1126 28
            $to_encoding
1127
            &&
1128 28
            $from_encoding
1129
            &&
1130 28
            $from_encoding === $to_encoding
1131
        ) {
1132
            return $str;
1133
        }
1134
1135 28
        if ($to_encoding === 'JSON') {
1136 1
            $return = self::json_encode($str);
1137 1
            if ($return === false) {
1138
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1139
            }
1140
1141 1
            return $return;
1142
        }
1143 28
        if ($from_encoding === 'JSON') {
1144 1
            $str = self::json_decode($str);
1145 1
            $from_encoding = '';
1146
        }
1147
1148 28
        if ($to_encoding === 'BASE64') {
1149 2
            return \base64_encode($str);
1150
        }
1151 28
        if ($from_encoding === 'BASE64') {
1152 2
            $str = \base64_decode($str, true);
1153 2
            $from_encoding = '';
1154
        }
1155
1156 28
        if ($to_encoding === 'HTML-ENTITIES') {
1157 2
            return self::html_encode($str, true, 'UTF-8');
1158
        }
1159 28
        if ($from_encoding === 'HTML-ENTITIES') {
1160 2
            $str = self::html_entity_decode($str, \ENT_COMPAT, 'UTF-8');
1161 2
            $from_encoding = '';
1162
        }
1163
1164 28
        $from_encoding_auto_detected = false;
1165
        if (
1166 28
            $auto_detect_the_from_encoding === true
1167
            ||
1168 28
            !$from_encoding
1169
        ) {
1170 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1171
        }
1172
1173
        // DEBUG
1174
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1175
1176 28
        if ($from_encoding_auto_detected !== false) {
1177
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1178 24
            $from_encoding = $from_encoding_auto_detected;
1179 7
        } elseif ($auto_detect_the_from_encoding === true) {
1180
            // fallback for the "autodetect"-mode
1181 7
            return self::to_utf8($str);
1182
        }
1183
1184
        if (
1185 24
            !$from_encoding
1186
            ||
1187 24
            $from_encoding === $to_encoding
1188
        ) {
1189 15
            return $str;
1190
        }
1191
1192
        if (
1193 19
            $to_encoding === 'UTF-8'
1194
            &&
1195
            (
1196 17
                $from_encoding === 'WINDOWS-1252'
1197
                ||
1198 19
                $from_encoding === 'ISO-8859-1'
1199
            )
1200
        ) {
1201 13
            return self::to_utf8($str);
1202
        }
1203
1204
        if (
1205 12
            $to_encoding === 'ISO-8859-1'
1206
            &&
1207
            (
1208 6
                $from_encoding === 'WINDOWS-1252'
1209
                ||
1210 12
                $from_encoding === 'UTF-8'
1211
            )
1212
        ) {
1213 6
            return self::to_iso8859($str);
1214
        }
1215
1216
        if (
1217 10
            $to_encoding !== 'UTF-8'
1218
            &&
1219 10
            $to_encoding !== 'ISO-8859-1'
1220
            &&
1221 10
            $to_encoding !== 'WINDOWS-1252'
1222
            &&
1223 10
            self::$SUPPORT['mbstring'] === false
1224
        ) {
1225
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1226
        }
1227
1228 10
        if (self::$SUPPORT['mbstring'] === true) {
1229
            // warning: do not use the symfony polyfill here
1230 10
            $str_encoded = \mb_convert_encoding(
1231 10
                $str,
1232 10
                $to_encoding,
1233 10
                $from_encoding
1234
            );
1235
1236 10
            if ($str_encoded) {
1237 10
                return $str_encoded;
1238
            }
1239
        }
1240
1241
        $return = \iconv($from_encoding, $to_encoding, $str);
1242
        if ($return !== false) {
1243
            return $return;
1244
        }
1245
1246
        return $str;
1247
    }
1248
1249
    /**
1250
     * @param string $str
1251
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1252
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1253
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1254
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1255
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1256
     *
1257
     * @return false|string
1258
     *                      <p>An encoded MIME field on success,
1259
     *                      or false if an error occurs during the encoding.</p>
1260
     */
1261
    public static function encode_mimeheader(
1262
        $str,
1263
        $from_charset = 'UTF-8',
1264
        $to_charset = 'UTF-8',
1265
        $transfer_encoding = 'Q',
1266
        $linefeed = '\\r\\n',
1267
        $indent = 76
1268
    ) {
1269
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1270
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1271
        }
1272
1273
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1274
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1275
        }
1276
1277
        return \iconv_mime_encode(
1278
            '',
1279
            $str,
1280
            [
1281
                'scheme'           => $transfer_encoding,
1282
                'line-length'      => $indent,
1283
                'input-charset'    => $from_charset,
1284
                'output-charset'   => $to_charset,
1285
                'line-break-chars' => $linefeed,
1286
            ]
1287
        );
1288
    }
1289
1290
    /**
1291
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1292
     *
1293
     * @param string   $str                       <p>The input string.</p>
1294
     * @param string   $search                    <p>The searched string.</p>
1295
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1296
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1297
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1298
     *
1299
     * @return string
1300
     */
1301 1
    public static function extract_text(
1302
        string $str,
1303
        string $search = '',
1304
        int $length = null,
1305
        string $replacer_for_skipped_text = '…',
1306
        string $encoding = 'UTF-8'
1307
    ): string {
1308 1
        if ($str === '') {
1309 1
            return '';
1310
        }
1311
1312 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1313
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1314
        }
1315
1316 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1317
1318 1
        if ($length === null) {
1319 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1320
        }
1321
1322 1
        if ($search === '') {
1323 1
            if ($encoding === 'UTF-8') {
1324 1
                if ($length > 0) {
1325 1
                    $string_length = (int) \mb_strlen($str);
1326 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1327
                } else {
1328 1
                    $end = 0;
1329
                }
1330
1331 1
                $pos = (int) \min(
1332 1
                    \mb_strpos($str, ' ', $end),
1333 1
                    \mb_strpos($str, '.', $end)
1334
                );
1335
            } else {
1336
                if ($length > 0) {
1337
                    $string_length = (int) self::strlen($str, $encoding);
1338
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1339
                } else {
1340
                    $end = 0;
1341
                }
1342
1343
                $pos = (int) \min(
1344
                    self::strpos($str, ' ', $end, $encoding),
1345
                    self::strpos($str, '.', $end, $encoding)
1346
                );
1347
            }
1348
1349 1
            if ($pos) {
1350 1
                if ($encoding === 'UTF-8') {
1351 1
                    $str_sub = \mb_substr($str, 0, $pos);
1352
                } else {
1353
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1354
                }
1355
1356 1
                if ($str_sub === false) {
1357
                    return '';
1358
                }
1359
1360 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1361
            }
1362
1363
            return $str;
1364
        }
1365
1366 1
        if ($encoding === 'UTF-8') {
1367 1
            $word_position = (int) \mb_stripos($str, $search);
1368 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1369
        } else {
1370
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1371
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1372
        }
1373
1374 1
        $pos_start = 0;
1375 1
        if ($half_side > 0) {
1376 1
            if ($encoding === 'UTF-8') {
1377 1
                $half_text = \mb_substr($str, 0, $half_side);
1378
            } else {
1379
                $half_text = self::substr($str, 0, $half_side, $encoding);
1380
            }
1381 1
            if ($half_text !== false) {
1382 1
                if ($encoding === 'UTF-8') {
1383 1
                    $pos_start = (int) \max(
1384 1
                        \mb_strrpos($half_text, ' '),
1385 1
                        \mb_strrpos($half_text, '.')
1386
                    );
1387
                } else {
1388
                    $pos_start = (int) \max(
1389
                        self::strrpos($half_text, ' ', 0, $encoding),
1390
                        self::strrpos($half_text, '.', 0, $encoding)
1391
                    );
1392
                }
1393
            }
1394
        }
1395
1396 1
        if ($word_position && $half_side > 0) {
1397 1
            $offset = $pos_start + $length - 1;
1398 1
            $real_length = (int) self::strlen($str, $encoding);
1399
1400 1
            if ($offset > $real_length) {
1401
                $offset = $real_length;
1402
            }
1403
1404 1
            if ($encoding === 'UTF-8') {
1405 1
                $pos_end = (int) \min(
1406 1
                    \mb_strpos($str, ' ', $offset),
1407 1
                    \mb_strpos($str, '.', $offset)
1408 1
                ) - $pos_start;
1409
            } else {
1410
                $pos_end = (int) \min(
1411
                    self::strpos($str, ' ', $offset, $encoding),
1412
                    self::strpos($str, '.', $offset, $encoding)
1413
                ) - $pos_start;
1414
            }
1415
1416 1
            if (!$pos_end || $pos_end <= 0) {
1417 1
                if ($encoding === 'UTF-8') {
1418 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1419
                } else {
1420
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1421
                }
1422 1
                if ($str_sub !== false) {
1423 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1424
                } else {
1425 1
                    $extract = '';
1426
                }
1427
            } else {
1428 1
                if ($encoding === 'UTF-8') {
1429 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1430
                } else {
1431
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1432
                }
1433 1
                if ($str_sub !== false) {
1434 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1435
                } else {
1436 1
                    $extract = '';
1437
                }
1438
            }
1439
        } else {
1440 1
            $offset = $length - 1;
1441 1
            $true_length = (int) self::strlen($str, $encoding);
1442
1443 1
            if ($offset > $true_length) {
1444
                $offset = $true_length;
1445
            }
1446
1447 1
            if ($encoding === 'UTF-8') {
1448 1
                $pos_end = (int) \min(
1449 1
                    \mb_strpos($str, ' ', $offset),
1450 1
                    \mb_strpos($str, '.', $offset)
1451
                );
1452
            } else {
1453
                $pos_end = (int) \min(
1454
                    self::strpos($str, ' ', $offset, $encoding),
1455
                    self::strpos($str, '.', $offset, $encoding)
1456
                );
1457
            }
1458
1459 1
            if ($pos_end) {
1460 1
                if ($encoding === 'UTF-8') {
1461 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1462
                } else {
1463
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1464
                }
1465 1
                if ($str_sub !== false) {
1466 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1467
                } else {
1468 1
                    $extract = '';
1469
                }
1470
            } else {
1471 1
                $extract = $str;
1472
            }
1473
        }
1474
1475 1
        return $extract;
1476
    }
1477
1478
    /**
1479
     * Reads entire file into a string.
1480
     *
1481
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1482
     *
1483
     * @see http://php.net/manual/en/function.file-get-contents.php
1484
     *
1485
     * @param string        $filename         <p>
1486
     *                                        Name of the file to read.
1487
     *                                        </p>
1488
     * @param bool          $use_include_path [optional] <p>
1489
     *                                        Prior to PHP 5, this parameter is called
1490
     *                                        use_include_path and is a bool.
1491
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1492
     *                                        to trigger include path
1493
     *                                        search.
1494
     *                                        </p>
1495
     * @param resource|null $context          [optional] <p>
1496
     *                                        A valid context resource created with
1497
     *                                        stream_context_create. If you don't need to use a
1498
     *                                        custom context, you can skip this parameter by &null;.
1499
     *                                        </p>
1500
     * @param int|null      $offset           [optional] <p>
1501
     *                                        The offset where the reading starts.
1502
     *                                        </p>
1503
     * @param int|null      $max_length       [optional] <p>
1504
     *                                        Maximum length of data read. The default is to read until end
1505
     *                                        of file is reached.
1506
     *                                        </p>
1507
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1508
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1509
     *                                        some files, because they used non default utf-8 chars. Binary files
1510
     *                                        like images or pdf will not be converted.</p>
1511
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1512
     *                                        A empty string will trigger the autodetect anyway.</p>
1513
     *
1514
     * @return false|string
1515
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1516
     */
1517 12
    public static function file_get_contents(
1518
        string $filename,
1519
        bool $use_include_path = false,
1520
        $context = null,
1521
        int $offset = null,
1522
        int $max_length = null,
1523
        int $timeout = 10,
1524
        bool $convert_to_utf8 = true,
1525
        string $from_encoding = ''
1526
    ) {
1527
        // init
1528 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1529
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1530 12
        if ($filename === false) {
1531
            return false;
1532
        }
1533
1534 12
        if ($timeout && $context === null) {
1535 9
            $context = \stream_context_create(
1536
                [
1537
                    'http' => [
1538 9
                        'timeout' => $timeout,
1539
                    ],
1540
                ]
1541
            );
1542
        }
1543
1544 12
        if ($offset === null) {
1545 12
            $offset = 0;
1546
        }
1547
1548 12
        if (\is_int($max_length) === true) {
1549 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1550
        } else {
1551 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1552
        }
1553
1554
        // return false on error
1555 12
        if ($data === false) {
1556
            return false;
1557
        }
1558
1559 12
        if ($convert_to_utf8 === true) {
1560
            if (
1561 12
                self::is_binary($data, true) !== true
1562
                ||
1563 9
                self::is_utf16($data, false) !== false
1564
                ||
1565 12
                self::is_utf32($data, false) !== false
1566
            ) {
1567 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1568 9
                $data = self::cleanup($data);
1569
            }
1570
        }
1571
1572 12
        return $data;
1573
    }
1574
1575
    /**
1576
     * Checks if a file starts with BOM (Byte Order Mark) character.
1577
     *
1578
     * @param string $file_path <p>Path to a valid file.</p>
1579
     *
1580
     * @throws \RuntimeException if file_get_contents() returned false
1581
     *
1582
     * @return bool
1583
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1584
     */
1585 2
    public static function file_has_bom(string $file_path): bool
1586
    {
1587 2
        $file_content = \file_get_contents($file_path);
1588 2
        if ($file_content === false) {
1589
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1590
        }
1591
1592 2
        return self::string_has_bom($file_content);
1593
    }
1594
1595
    /**
1596
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1597
     *
1598
     * @param mixed  $var
1599
     * @param int    $normalization_form
1600
     * @param string $leading_combining
1601
     *
1602
     * @return mixed
1603
     */
1604 62
    public static function filter(
1605
        $var,
1606
        int $normalization_form = \Normalizer::NFC,
1607
        string $leading_combining = '◌'
1608
    ) {
1609 62
        switch (\gettype($var)) {
1610 62
            case 'array':
1611
                /** @noinspection ForeachSourceInspection */
1612 6
                foreach ($var as $k => &$v) {
1613 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1614
                }
1615 6
                unset($v);
1616
1617 6
                break;
1618 62
            case 'object':
1619
                /** @noinspection ForeachSourceInspection */
1620 4
                foreach ($var as $k => &$v) {
1621 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1622
                }
1623 4
                unset($v);
1624
1625 4
                break;
1626 62
            case 'string':
1627
1628 62
                if (\strpos($var, "\r") !== false) {
1629
                    // Workaround https://bugs.php.net/65732
1630 3
                    $var = self::normalize_line_ending($var);
1631
                }
1632
1633 62
                if (ASCII::is_ascii($var) === false) {
1634 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1635 27
                        $n = '-';
1636
                    } else {
1637 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1638
1639 12
                        if (isset($n[0])) {
1640 7
                            $var = $n;
1641
                        } else {
1642 8
                            $var = self::encode('UTF-8', $var, true);
1643
                        }
1644
                    }
1645
1646
                    if (
1647 32
                        $var[0] >= "\x80"
1648
                        &&
1649 32
                        isset($n[0], $leading_combining[0])
1650
                        &&
1651 32
                        \preg_match('/^\\p{Mn}/u', $var)
1652
                    ) {
1653
                        // Prevent leading combining chars
1654
                        // for NFC-safe concatenations.
1655 3
                        $var = $leading_combining . $var;
1656
                    }
1657
                }
1658
1659 62
                break;
1660
        }
1661
1662 62
        return $var;
1663
    }
1664
1665
    /**
1666
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1667
     *
1668
     * Gets a specific external variable by name and optionally filters it
1669
     *
1670
     * @see http://php.net/manual/en/function.filter-input.php
1671
     *
1672
     * @param int    $type          <p>
1673
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1674
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1675
     *                              <b>INPUT_ENV</b>.
1676
     *                              </p>
1677
     * @param string $variable_name <p>
1678
     *                              Name of a variable to get.
1679
     *                              </p>
1680
     * @param int    $filter        [optional] <p>
1681
     *                              The ID of the filter to apply. The
1682
     *                              manual page lists the available filters.
1683
     *                              </p>
1684
     * @param mixed  $options       [optional] <p>
1685
     *                              Associative array of options or bitwise disjunction of flags. If filter
1686
     *                              accepts options, flags can be provided in "flags" field of array.
1687
     *                              </p>
1688
     *
1689
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1690
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1691
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1692
     */
1693
    public static function filter_input(
1694
        int $type,
1695
        string $variable_name,
1696
        int $filter = \FILTER_DEFAULT,
1697
        $options = null
1698
    ) {
1699
        if ($options === null || \func_num_args() < 4) {
1700
            $var = \filter_input($type, $variable_name, $filter);
1701
        } else {
1702
            $var = \filter_input($type, $variable_name, $filter, $options);
1703
        }
1704
1705
        return self::filter($var);
1706
    }
1707
1708
    /**
1709
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
     *
1711
     * Gets external variables and optionally filters them
1712
     *
1713
     * @see http://php.net/manual/en/function.filter-input-array.php
1714
     *
1715
     * @param int   $type       <p>
1716
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1717
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1718
     *                          <b>INPUT_ENV</b>.
1719
     *                          </p>
1720
     * @param mixed $definition [optional] <p>
1721
     *                          An array defining the arguments. A valid key is a string
1722
     *                          containing a variable name and a valid value is either a filter type, or an array
1723
     *                          optionally specifying the filter, flags and options. If the value is an
1724
     *                          array, valid keys are filter which specifies the
1725
     *                          filter type,
1726
     *                          flags which specifies any flags that apply to the
1727
     *                          filter, and options which specifies any options that
1728
     *                          apply to the filter. See the example below for a better understanding.
1729
     *                          </p>
1730
     *                          <p>
1731
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1732
     *                          input array are filtered by this filter.
1733
     *                          </p>
1734
     * @param bool  $add_empty  [optional] <p>
1735
     *                          Add missing keys as <b>NULL</b> to the return value.
1736
     *                          </p>
1737
     *
1738
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1739
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1740
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1741
     *               is not set and <b>NULL</b> if the filter fails.
1742
     */
1743
    public static function filter_input_array(
1744
        int $type,
1745
        $definition = null,
1746
        bool $add_empty = true
1747
    ) {
1748
        if ($definition === null || \func_num_args() < 2) {
1749
            $a = \filter_input_array($type);
1750
        } else {
1751
            $a = \filter_input_array($type, $definition, $add_empty);
1752
        }
1753
1754
        return self::filter($a);
1755
    }
1756
1757
    /**
1758
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1759
     *
1760
     * Filters a variable with a specified filter
1761
     *
1762
     * @see http://php.net/manual/en/function.filter-var.php
1763
     *
1764
     * @param mixed $variable <p>
1765
     *                        Value to filter.
1766
     *                        </p>
1767
     * @param int   $filter   [optional] <p>
1768
     *                        The ID of the filter to apply. The
1769
     *                        manual page lists the available filters.
1770
     *                        </p>
1771
     * @param mixed $options  [optional] <p>
1772
     *                        Associative array of options or bitwise disjunction of flags. If filter
1773
     *                        accepts options, flags can be provided in "flags" field of array. For
1774
     *                        the "callback" filter, callable type should be passed. The
1775
     *                        callback must accept one argument, the value to be filtered, and return
1776
     *                        the value after filtering/sanitizing it.
1777
     *                        </p>
1778
     *                        <p>
1779
     *                        <code>
1780
     *                        // for filters that accept options, use this format
1781
     *                        $options = array(
1782
     *                        'options' => array(
1783
     *                        'default' => 3, // value to return if the filter fails
1784
     *                        // other options here
1785
     *                        'min_range' => 0
1786
     *                        ),
1787
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1788
     *                        );
1789
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1790
     *                        // for filter that only accept flags, you can pass them directly
1791
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1792
     *                        // for filter that only accept flags, you can also pass as an array
1793
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1794
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1795
     *                        // callback validate filter
1796
     *                        function foo($value)
1797
     *                        {
1798
     *                        // Expected format: Surname, GivenNames
1799
     *                        if (strpos($value, ", ") === false) return false;
1800
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1801
     *                        $empty = (empty($surname) || empty($givennames));
1802
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1803
     *                        if ($empty || $notstrings) {
1804
     *                        return false;
1805
     *                        } else {
1806
     *                        return $value;
1807
     *                        }
1808
     *                        }
1809
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1810
     *                        </code>
1811
     *                        </p>
1812
     *
1813
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1814
     */
1815 2
    public static function filter_var(
1816
        $variable,
1817
        int $filter = \FILTER_DEFAULT,
1818
        $options = null
1819
    ) {
1820 2
        if (\func_num_args() < 3) {
1821 2
            $variable = \filter_var($variable, $filter);
1822
        } else {
1823 2
            $variable = \filter_var($variable, $filter, $options);
1824
        }
1825
1826 2
        return self::filter($variable);
1827
    }
1828
1829
    /**
1830
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1831
     *
1832
     * Gets multiple variables and optionally filters them
1833
     *
1834
     * @see http://php.net/manual/en/function.filter-var-array.php
1835
     *
1836
     * @param array $data       <p>
1837
     *                          An array with string keys containing the data to filter.
1838
     *                          </p>
1839
     * @param mixed $definition [optional] <p>
1840
     *                          An array defining the arguments. A valid key is a string
1841
     *                          containing a variable name and a valid value is either a
1842
     *                          filter type, or an
1843
     *                          array optionally specifying the filter, flags and options.
1844
     *                          If the value is an array, valid keys are filter
1845
     *                          which specifies the filter type,
1846
     *                          flags which specifies any flags that apply to the
1847
     *                          filter, and options which specifies any options that
1848
     *                          apply to the filter. See the example below for a better understanding.
1849
     *                          </p>
1850
     *                          <p>
1851
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1852
     *                          input array are filtered by this filter.
1853
     *                          </p>
1854
     * @param bool  $add_empty  [optional] <p>
1855
     *                          Add missing keys as <b>NULL</b> to the return value.
1856
     *                          </p>
1857
     *
1858
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1859
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1860
     *               set
1861
     */
1862 2
    public static function filter_var_array(
1863
        array $data,
1864
        $definition = null,
1865
        bool $add_empty = true
1866
    ) {
1867 2
        if (\func_num_args() < 2) {
1868 2
            $a = \filter_var_array($data);
1869
        } else {
1870 2
            $a = \filter_var_array($data, $definition, $add_empty);
1871
        }
1872
1873 2
        return self::filter($a);
1874
    }
1875
1876
    /**
1877
     * Checks whether finfo is available on the server.
1878
     *
1879
     * @return bool
1880
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1881
     */
1882
    public static function finfo_loaded(): bool
1883
    {
1884
        return \class_exists('finfo');
1885
    }
1886
1887
    /**
1888
     * Returns the first $n characters of the string.
1889
     *
1890
     * @param string $str      <p>The input string.</p>
1891
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1892
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1893
     *
1894
     * @return string
1895
     */
1896 13
    public static function first_char(
1897
        string $str,
1898
        int $n = 1,
1899
        string $encoding = 'UTF-8'
1900
    ): string {
1901 13
        if ($str === '' || $n <= 0) {
1902 5
            return '';
1903
        }
1904
1905 8
        if ($encoding === 'UTF-8') {
1906 4
            return (string) \mb_substr($str, 0, $n);
1907
        }
1908
1909 4
        return (string) self::substr($str, 0, $n, $encoding);
1910
    }
1911
1912
    /**
1913
     * Check if the number of Unicode characters isn't greater than the specified integer.
1914
     *
1915
     * @param string $str      the original string to be checked
1916
     * @param int    $box_size the size in number of chars to be checked against string
1917
     *
1918
     * @return bool true if string is less than or equal to $box_size, false otherwise
1919
     */
1920 2
    public static function fits_inside(string $str, int $box_size): bool
1921
    {
1922 2
        return (int) self::strlen($str) <= $box_size;
1923
    }
1924
1925
    /**
1926
     * Try to fix simple broken UTF-8 strings.
1927
     *
1928
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1929
     *
1930
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1931
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1932
     * See: http://en.wikipedia.org/wiki/Windows-1252
1933
     *
1934
     * @param string $str <p>The input string</p>
1935
     *
1936
     * @return string
1937
     */
1938 46
    public static function fix_simple_utf8(string $str): string
1939
    {
1940 46
        if ($str === '') {
1941 4
            return '';
1942
        }
1943
1944 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1945 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1946
1947 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1948 1
            if (self::$BROKEN_UTF8_FIX === null) {
1949 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1950
            }
1951
1952 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1953 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1954
        }
1955
1956 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1957
    }
1958
1959
    /**
1960
     * Fix a double (or multiple) encoded UTF8 string.
1961
     *
1962
     * @param string|string[] $str you can use a string or an array of strings
1963
     *
1964
     * @return string|string[]
1965
     *                         Will return the fixed input-"array" or
1966
     *                         the fixed input-"string"
1967
     *
1968
     * @psalm-suppress InvalidReturnType
1969
     */
1970 2
    public static function fix_utf8($str)
1971
    {
1972 2
        if (\is_array($str) === true) {
1973 2
            foreach ($str as $k => &$v) {
1974 2
                $v = self::fix_utf8($v);
1975
            }
1976 2
            unset($v);
1977
1978
            /**
1979
             * @psalm-suppress InvalidReturnStatement
1980
             */
1981 2
            return $str;
1982
        }
1983
1984 2
        $str = (string) $str;
1985 2
        $last = '';
1986 2
        while ($last !== $str) {
1987 2
            $last = $str;
1988
            /**
1989
             * @psalm-suppress PossiblyInvalidArgument
1990
             */
1991 2
            $str = self::to_utf8(
1992 2
                self::utf8_decode($str, true)
1993
            );
1994
        }
1995
1996
        /**
1997
         * @psalm-suppress InvalidReturnStatement
1998
         */
1999 2
        return $str;
2000
    }
2001
2002
    /**
2003
     * Get character of a specific character.
2004
     *
2005
     * @param string $char
2006
     *
2007
     * @return string 'RTL' or 'LTR'
2008
     */
2009 2
    public static function getCharDirection(string $char): string
2010
    {
2011 2
        if (self::$SUPPORT['intlChar'] === true) {
2012
            /** @noinspection PhpComposerExtensionStubsInspection */
2013 2
            $tmp_return = \IntlChar::charDirection($char);
2014
2015
            // from "IntlChar"-Class
2016
            $char_direction = [
2017 2
                'RTL' => [1, 13, 14, 15, 21],
2018
                'LTR' => [0, 11, 12, 20],
2019
            ];
2020
2021 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2022
                return 'LTR';
2023
            }
2024
2025 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2026 2
                return 'RTL';
2027
            }
2028
        }
2029
2030 2
        $c = static::chr_to_decimal($char);
2031
2032 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2033 2
            return 'LTR';
2034
        }
2035
2036 2
        if ($c <= 0x85e) {
2037 2
            if ($c === 0x5be ||
2038 2
                $c === 0x5c0 ||
2039 2
                $c === 0x5c3 ||
2040 2
                $c === 0x5c6 ||
2041 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2042 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2043 2
                $c === 0x608 ||
2044 2
                $c === 0x60b ||
2045 2
                $c === 0x60d ||
2046 2
                $c === 0x61b ||
2047 2
                ($c >= 0x61e && $c <= 0x64a) ||
2048
                ($c >= 0x66d && $c <= 0x66f) ||
2049
                ($c >= 0x671 && $c <= 0x6d5) ||
2050
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2051
                ($c >= 0x6ee && $c <= 0x6ef) ||
2052
                ($c >= 0x6fa && $c <= 0x70d) ||
2053
                $c === 0x710 ||
2054
                ($c >= 0x712 && $c <= 0x72f) ||
2055
                ($c >= 0x74d && $c <= 0x7a5) ||
2056
                $c === 0x7b1 ||
2057
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2058
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2059
                $c === 0x7fa ||
2060
                ($c >= 0x800 && $c <= 0x815) ||
2061
                $c === 0x81a ||
2062
                $c === 0x824 ||
2063
                $c === 0x828 ||
2064
                ($c >= 0x830 && $c <= 0x83e) ||
2065
                ($c >= 0x840 && $c <= 0x858) ||
2066 2
                $c === 0x85e
2067
            ) {
2068 2
                return 'RTL';
2069
            }
2070 2
        } elseif ($c === 0x200f) {
2071
            return 'RTL';
2072 2
        } elseif ($c >= 0xfb1d) {
2073 2
            if ($c === 0xfb1d ||
2074 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2075 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2076 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2077 2
                $c === 0xfb3e ||
2078 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2079 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2080 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2081 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2082 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2083 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2084 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2085 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2086 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2087 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2088 2
                $c === 0x10808 ||
2089 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2090 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2091 2
                $c === 0x1083c ||
2092 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2093 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2094 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2095 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2096 2
                $c === 0x1093f ||
2097 2
                $c === 0x10a00 ||
2098 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2099 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2100 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2101 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2102 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2103 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2104 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2105 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2106 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2107 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2108
            ) {
2109 2
                return 'RTL';
2110
            }
2111
        }
2112
2113 2
        return 'LTR';
2114
    }
2115
2116
    /**
2117
     * Check for php-support.
2118
     *
2119
     * @param string|null $key
2120
     *
2121
     * @return mixed
2122
     *               Return the full support-"array", if $key === null<br>
2123
     *               return bool-value, if $key is used and available<br>
2124
     *               otherwise return <strong>null</strong>
2125
     */
2126 27
    public static function getSupportInfo(string $key = null)
2127
    {
2128 27
        if ($key === null) {
2129 4
            return self::$SUPPORT;
2130
        }
2131
2132 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2133 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2134
        }
2135
        // compatibility fix for old versions
2136 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2137
2138 25
        return self::$SUPPORT[$key] ?? null;
2139
    }
2140
2141
    /**
2142
     * Warning: this method only works for some file-types (png, jpg)
2143
     *          if you need more supported types, please use e.g. "finfo"
2144
     *
2145
     * @param string $str
2146
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2147
     *
2148
     * @return array<string, string|null>
2149
     *                       <p>with this keys: 'ext', 'mime', 'type'</p>
2150
     */
2151 39
    public static function get_file_type(
2152
        string $str,
2153
        array $fallback = [
2154
            'ext'  => null,
2155
            'mime' => 'application/octet-stream',
2156
            'type' => null,
2157
        ]
2158
    ): array {
2159 39
        if ($str === '') {
2160
            return $fallback;
2161
        }
2162
2163
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2164 39
        $str_info = \substr($str, 0, 2);
2165 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2166 11
            return $fallback;
2167
        }
2168
2169
        // DEBUG
2170
        //var_dump($str_info);
2171
2172
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2173 35
        $str_info = \unpack('C2chars', $str_info);
0 ignored issues
show
Bug introduced by
$str_info of type array|false is incompatible with the type string expected by parameter $data of unpack(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2173
        $str_info = \unpack('C2chars', /** @scrutinizer ignore-type */ $str_info);
Loading history...
2174 35
        if ($str_info === false) {
2175
            return $fallback;
2176
        }
2177
        /** @noinspection OffsetOperationsInspection */
2178 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2179
2180
        // DEBUG
2181
        //var_dump($type_code);
2182
2183
        //
2184
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2185
        //
2186
        switch ($type_code) {
2187
            // WARNING: do not add too simple comparisons, because of false-positive results:
2188
            //
2189
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2190
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2191
            //
2192 35
            case 255216:
2193
                $ext = 'jpg';
2194
                $mime = 'image/jpeg';
2195
                $type = 'binary';
2196
2197
                break;
2198 35
            case 13780:
2199 7
                $ext = 'png';
2200 7
                $mime = 'image/png';
2201 7
                $type = 'binary';
2202
2203 7
                break;
2204
            default:
2205 34
                return $fallback;
2206
        }
2207
2208
        return [
2209 7
            'ext'  => $ext,
2210 7
            'mime' => $mime,
2211 7
            'type' => $type,
2212
        ];
2213
    }
2214
2215
    /**
2216
     * @param int    $length         <p>Length of the random string.</p>
2217
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2218
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2219
     *
2220
     * @return string
2221
     */
2222 1
    public static function get_random_string(
2223
        int $length,
2224
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2225
        string $encoding = 'UTF-8'
2226
    ): string {
2227
        // init
2228 1
        $i = 0;
2229 1
        $str = '';
2230
2231
        //
2232
        // add random chars
2233
        //
2234
2235 1
        if ($encoding === 'UTF-8') {
2236 1
            $max_length = (int) \mb_strlen($possible_chars);
2237 1
            if ($max_length === 0) {
2238 1
                return '';
2239
            }
2240
2241 1
            while ($i < $length) {
2242
                try {
2243 1
                    $rand_int = \random_int(0, $max_length - 1);
2244
                } catch (\Exception $e) {
2245
                    /** @noinspection RandomApiMigrationInspection */
2246
                    $rand_int = \mt_rand(0, $max_length - 1);
2247
                }
2248 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2249 1
                if ($char !== false) {
2250 1
                    $str .= $char;
2251 1
                    ++$i;
2252
                }
2253
            }
2254
        } else {
2255
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2256
2257
            $max_length = (int) self::strlen($possible_chars, $encoding);
2258
            if ($max_length === 0) {
2259
                return '';
2260
            }
2261
2262
            while ($i < $length) {
2263
                try {
2264
                    $rand_int = \random_int(0, $max_length - 1);
2265
                } catch (\Exception $e) {
2266
                    /** @noinspection RandomApiMigrationInspection */
2267
                    $rand_int = \mt_rand(0, $max_length - 1);
2268
                }
2269
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2270
                if ($char !== false) {
2271
                    $str .= $char;
2272
                    ++$i;
2273
                }
2274
            }
2275
        }
2276
2277 1
        return $str;
2278
    }
2279
2280
    /**
2281
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2282
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2283
     *
2284
     * @return string
2285
     */
2286 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2287
    {
2288 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2289 1
                        \session_id() .
2290 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2291 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2292 1
                        $entropy_extra;
2293
2294 1
        $unique_string = \uniqid($unique_helper, true);
2295
2296 1
        if ($use_md5) {
2297 1
            $unique_string = \md5($unique_string . $unique_helper);
2298
        }
2299
2300 1
        return $unique_string;
2301
    }
2302
2303
    /**
2304
     * alias for "UTF8::string_has_bom()"
2305
     *
2306
     * @param string $str
2307
     *
2308
     * @return bool
2309
     *
2310
     * @see UTF8::string_has_bom()
2311
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2312
     */
2313 2
    public static function hasBom(string $str): bool
2314
    {
2315 2
        return self::string_has_bom($str);
2316
    }
2317
2318
    /**
2319
     * Returns true if the string contains a lower case char, false otherwise.
2320
     *
2321
     * @param string $str <p>The input string.</p>
2322
     *
2323
     * @return bool
2324
     *              <p>Whether or not the string contains a lower case character.</p>
2325
     */
2326 47
    public static function has_lowercase(string $str): bool
2327
    {
2328 47
        if (self::$SUPPORT['mbstring'] === true) {
2329
            /** @noinspection PhpComposerExtensionStubsInspection */
2330 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2331
        }
2332
2333
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2334
    }
2335
2336
    /**
2337
     * Returns true if the string contains an upper case char, false otherwise.
2338
     *
2339
     * @param string $str <p>The input string.</p>
2340
     *
2341
     * @return bool whether or not the string contains an upper case character
2342
     */
2343 12
    public static function has_uppercase(string $str): bool
2344
    {
2345 12
        if (self::$SUPPORT['mbstring'] === true) {
2346
            /** @noinspection PhpComposerExtensionStubsInspection */
2347 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2348
        }
2349
2350
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2351
    }
2352
2353
    /**
2354
     * Converts a hexadecimal value into a UTF-8 character.
2355
     *
2356
     * @param string $hexdec <p>The hexadecimal value.</p>
2357
     *
2358
     * @return false|string one single UTF-8 character
2359
     */
2360 4
    public static function hex_to_chr(string $hexdec)
2361
    {
2362 4
        return self::decimal_to_chr(\hexdec($hexdec));
2363
    }
2364
2365
    /**
2366
     * Converts hexadecimal U+xxxx code point representation to integer.
2367
     *
2368
     * INFO: opposite to UTF8::int_to_hex()
2369
     *
2370
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2371
     *
2372
     * @return false|int the code point, or false on failure
2373
     */
2374 2
    public static function hex_to_int($hexdec)
2375
    {
2376
        // init
2377 2
        $hexdec = (string) $hexdec;
2378
2379 2
        if ($hexdec === '') {
2380 2
            return false;
2381
        }
2382
2383 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2384 2
            return \intval($match[1], 16);
2385
        }
2386
2387 2
        return false;
2388
    }
2389
2390
    /**
2391
     * alias for "UTF8::html_entity_decode()"
2392
     *
2393
     * @param string $str
2394
     * @param int    $flags
2395
     * @param string $encoding
2396
     *
2397
     * @return string
2398
     *
2399
     * @see UTF8::html_entity_decode()
2400
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2401
     */
2402 2
    public static function html_decode(
2403
        string $str,
2404
        int $flags = null,
2405
        string $encoding = 'UTF-8'
2406
    ): string {
2407 2
        return self::html_entity_decode($str, $flags, $encoding);
2408
    }
2409
2410
    /**
2411
     * Converts a UTF-8 string to a series of HTML numbered entities.
2412
     *
2413
     * INFO: opposite to UTF8::html_decode()
2414
     *
2415
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2416
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2417
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2418
     *
2419
     * @return string HTML numbered entities
2420
     */
2421 14
    public static function html_encode(
2422
        string $str,
2423
        bool $keep_ascii_chars = false,
2424
        string $encoding = 'UTF-8'
2425
    ): string {
2426 14
        if ($str === '') {
2427 4
            return '';
2428
        }
2429
2430 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2431 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2432
        }
2433
2434
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2435 14
        if (self::$SUPPORT['mbstring'] === true) {
2436 14
            $start_code = 0x00;
2437 14
            if ($keep_ascii_chars === true) {
2438 13
                $start_code = 0x80;
2439
            }
2440
2441 14
            if ($encoding === 'UTF-8') {
2442
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2443 14
                $return = \mb_encode_numericentity(
2444 14
                    $str,
2445 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2446
                );
2447 14
                if ($return !== null && $return !== false) {
2448 14
                    return $return;
2449
                }
2450
            }
2451
2452
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2453 4
            $return = \mb_encode_numericentity(
2454 4
                $str,
2455 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2456 4
                $encoding
2457
            );
2458 4
            if ($return !== null && $return !== false) {
2459 4
                return $return;
2460
            }
2461
        }
2462
2463
        //
2464
        // fallback via vanilla php
2465
        //
2466
2467
        return \implode(
2468
            '',
2469
            \array_map(
2470
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2471
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2472
                },
2473
                self::str_split($str)
2474
            )
2475
        );
2476
    }
2477
2478
    /**
2479
     * UTF-8 version of html_entity_decode()
2480
     *
2481
     * The reason we are not using html_entity_decode() by itself is because
2482
     * while it is not technically correct to leave out the semicolon
2483
     * at the end of an entity most browsers will still interpret the entity
2484
     * correctly. html_entity_decode() does not convert entities without
2485
     * semicolons, so we are left with our own little solution here. Bummer.
2486
     *
2487
     * Convert all HTML entities to their applicable characters
2488
     *
2489
     * INFO: opposite to UTF8::html_encode()
2490
     *
2491
     * @see http://php.net/manual/en/function.html-entity-decode.php
2492
     *
2493
     * @param string $str      <p>
2494
     *                         The input string.
2495
     *                         </p>
2496
     * @param int    $flags    [optional] <p>
2497
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2498
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2499
     *                         <table>
2500
     *                         Available <i>flags</i> constants
2501
     *                         <tr valign="top">
2502
     *                         <td>Constant Name</td>
2503
     *                         <td>Description</td>
2504
     *                         </tr>
2505
     *                         <tr valign="top">
2506
     *                         <td><b>ENT_COMPAT</b></td>
2507
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2508
     *                         </tr>
2509
     *                         <tr valign="top">
2510
     *                         <td><b>ENT_QUOTES</b></td>
2511
     *                         <td>Will convert both double and single quotes.</td>
2512
     *                         </tr>
2513
     *                         <tr valign="top">
2514
     *                         <td><b>ENT_NOQUOTES</b></td>
2515
     *                         <td>Will leave both double and single quotes unconverted.</td>
2516
     *                         </tr>
2517
     *                         <tr valign="top">
2518
     *                         <td><b>ENT_HTML401</b></td>
2519
     *                         <td>
2520
     *                         Handle code as HTML 4.01.
2521
     *                         </td>
2522
     *                         </tr>
2523
     *                         <tr valign="top">
2524
     *                         <td><b>ENT_XML1</b></td>
2525
     *                         <td>
2526
     *                         Handle code as XML 1.
2527
     *                         </td>
2528
     *                         </tr>
2529
     *                         <tr valign="top">
2530
     *                         <td><b>ENT_XHTML</b></td>
2531
     *                         <td>
2532
     *                         Handle code as XHTML.
2533
     *                         </td>
2534
     *                         </tr>
2535
     *                         <tr valign="top">
2536
     *                         <td><b>ENT_HTML5</b></td>
2537
     *                         <td>
2538
     *                         Handle code as HTML 5.
2539
     *                         </td>
2540
     *                         </tr>
2541
     *                         </table>
2542
     *                         </p>
2543
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2544
     *
2545
     * @return string the decoded string
2546
     */
2547 50
    public static function html_entity_decode(
2548
        string $str,
2549
        int $flags = null,
2550
        string $encoding = 'UTF-8'
2551
    ): string {
2552
        if (
2553 50
            !isset($str[3]) // examples: &; || &x;
2554
            ||
2555 50
            \strpos($str, '&') === false // no "&"
2556
        ) {
2557 23
            return $str;
2558
        }
2559
2560 48
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2561 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2562
        }
2563
2564 48
        if ($flags === null) {
2565 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2566
        }
2567
2568
        if (
2569 48
            $encoding !== 'UTF-8'
2570
            &&
2571 48
            $encoding !== 'ISO-8859-1'
2572
            &&
2573 48
            $encoding !== 'WINDOWS-1252'
2574
            &&
2575 48
            self::$SUPPORT['mbstring'] === false
2576
        ) {
2577
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2578
        }
2579
2580
        do {
2581 48
            $str_compare = $str;
2582
2583 48
            if (\strpos($str, '&') !== false) {
2584 48
                if (\strpos($str, '&#') !== false) {
2585
                    // decode also numeric & UTF16 two byte entities
2586 40
                    $str = (string) \preg_replace(
2587 40
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2588 40
                        '$1;',
2589 40
                        $str
2590
                    );
2591
                }
2592
2593 48
                $str = \html_entity_decode(
2594 48
                    $str,
2595 48
                    $flags,
2596 48
                    $encoding
2597
                );
2598
            }
2599 48
        } while ($str_compare !== $str);
2600
2601 48
        return $str;
2602
    }
2603
2604
    /**
2605
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2606
     *
2607
     * @param string $str
2608
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2609
     *
2610
     * @return string
2611
     */
2612 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2613
    {
2614 6
        return self::htmlspecialchars(
2615 6
            $str,
2616 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2617 6
            $encoding
2618
        );
2619
    }
2620
2621
    /**
2622
     * Remove empty html-tag.
2623
     *
2624
     * e.g.: <tag></tag>
2625
     *
2626
     * @param string $str
2627
     *
2628
     * @return string
2629
     */
2630 1
    public static function html_stripe_empty_tags(string $str): string
2631
    {
2632 1
        return (string) \preg_replace(
2633 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2634 1
            '',
2635 1
            $str
2636
        );
2637
    }
2638
2639
    /**
2640
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2641
     *
2642
     * @see http://php.net/manual/en/function.htmlentities.php
2643
     *
2644
     * @param string $str           <p>
2645
     *                              The input string.
2646
     *                              </p>
2647
     * @param int    $flags         [optional] <p>
2648
     *                              A bitmask of one or more of the following flags, which specify how to handle
2649
     *                              quotes, invalid code unit sequences and the used document type. The default is
2650
     *                              ENT_COMPAT | ENT_HTML401.
2651
     *                              <table>
2652
     *                              Available <i>flags</i> constants
2653
     *                              <tr valign="top">
2654
     *                              <td>Constant Name</td>
2655
     *                              <td>Description</td>
2656
     *                              </tr>
2657
     *                              <tr valign="top">
2658
     *                              <td><b>ENT_COMPAT</b></td>
2659
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2660
     *                              </tr>
2661
     *                              <tr valign="top">
2662
     *                              <td><b>ENT_QUOTES</b></td>
2663
     *                              <td>Will convert both double and single quotes.</td>
2664
     *                              </tr>
2665
     *                              <tr valign="top">
2666
     *                              <td><b>ENT_NOQUOTES</b></td>
2667
     *                              <td>Will leave both double and single quotes unconverted.</td>
2668
     *                              </tr>
2669
     *                              <tr valign="top">
2670
     *                              <td><b>ENT_IGNORE</b></td>
2671
     *                              <td>
2672
     *                              Silently discard invalid code unit sequences instead of returning
2673
     *                              an empty string. Using this flag is discouraged as it
2674
     *                              may have security implications.
2675
     *                              </td>
2676
     *                              </tr>
2677
     *                              <tr valign="top">
2678
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2679
     *                              <td>
2680
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2681
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2682
     *                              string.
2683
     *                              </td>
2684
     *                              </tr>
2685
     *                              <tr valign="top">
2686
     *                              <td><b>ENT_DISALLOWED</b></td>
2687
     *                              <td>
2688
     *                              Replace invalid code points for the given document type with a
2689
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2690
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2691
     *                              instance, to ensure the well-formedness of XML documents with
2692
     *                              embedded external content.
2693
     *                              </td>
2694
     *                              </tr>
2695
     *                              <tr valign="top">
2696
     *                              <td><b>ENT_HTML401</b></td>
2697
     *                              <td>
2698
     *                              Handle code as HTML 4.01.
2699
     *                              </td>
2700
     *                              </tr>
2701
     *                              <tr valign="top">
2702
     *                              <td><b>ENT_XML1</b></td>
2703
     *                              <td>
2704
     *                              Handle code as XML 1.
2705
     *                              </td>
2706
     *                              </tr>
2707
     *                              <tr valign="top">
2708
     *                              <td><b>ENT_XHTML</b></td>
2709
     *                              <td>
2710
     *                              Handle code as XHTML.
2711
     *                              </td>
2712
     *                              </tr>
2713
     *                              <tr valign="top">
2714
     *                              <td><b>ENT_HTML5</b></td>
2715
     *                              <td>
2716
     *                              Handle code as HTML 5.
2717
     *                              </td>
2718
     *                              </tr>
2719
     *                              </table>
2720
     *                              </p>
2721
     * @param string $encoding      [optional] <p>
2722
     *                              Like <b>htmlspecialchars</b>,
2723
     *                              <b>htmlentities</b> takes an optional third argument
2724
     *                              <i>encoding</i> which defines encoding used in
2725
     *                              conversion.
2726
     *                              Although this argument is technically optional, you are highly
2727
     *                              encouraged to specify the correct value for your code.
2728
     *                              </p>
2729
     * @param bool   $double_encode [optional] <p>
2730
     *                              When <i>double_encode</i> is turned off PHP will not
2731
     *                              encode existing html entities. The default is to convert everything.
2732
     *                              </p>
2733
     *
2734
     * @return string
2735
     *                <p>
2736
     *                The encoded string.
2737
     *                <br><br>
2738
     *                If the input <i>string</i> contains an invalid code unit
2739
     *                sequence within the given <i>encoding</i> an empty string
2740
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2741
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2742
     *                </p>
2743
     */
2744 9
    public static function htmlentities(
2745
        string $str,
2746
        int $flags = \ENT_COMPAT,
2747
        string $encoding = 'UTF-8',
2748
        bool $double_encode = true
2749
    ): string {
2750 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2751 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2752
        }
2753
2754 9
        $str = \htmlentities(
2755 9
            $str,
2756 9
            $flags,
2757 9
            $encoding,
2758 9
            $double_encode
2759
        );
2760
2761
        /**
2762
         * PHP doesn't replace a backslash to its html entity since this is something
2763
         * that's mostly used to escape characters when inserting in a database. Since
2764
         * we're using a decent database layer, we don't need this shit and we're replacing
2765
         * the double backslashes by its' html entity equivalent.
2766
         *
2767
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2768
         */
2769 9
        $str = \str_replace('\\', '&#92;', $str);
2770
2771 9
        return self::html_encode($str, true, $encoding);
2772
    }
2773
2774
    /**
2775
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2776
     *
2777
     * INFO: Take a look at "UTF8::htmlentities()"
2778
     *
2779
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2780
     *
2781
     * @param string $str           <p>
2782
     *                              The string being converted.
2783
     *                              </p>
2784
     * @param int    $flags         [optional] <p>
2785
     *                              A bitmask of one or more of the following flags, which specify how to handle
2786
     *                              quotes, invalid code unit sequences and the used document type. The default is
2787
     *                              ENT_COMPAT | ENT_HTML401.
2788
     *                              <table>
2789
     *                              Available <i>flags</i> constants
2790
     *                              <tr valign="top">
2791
     *                              <td>Constant Name</td>
2792
     *                              <td>Description</td>
2793
     *                              </tr>
2794
     *                              <tr valign="top">
2795
     *                              <td><b>ENT_COMPAT</b></td>
2796
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2797
     *                              </tr>
2798
     *                              <tr valign="top">
2799
     *                              <td><b>ENT_QUOTES</b></td>
2800
     *                              <td>Will convert both double and single quotes.</td>
2801
     *                              </tr>
2802
     *                              <tr valign="top">
2803
     *                              <td><b>ENT_NOQUOTES</b></td>
2804
     *                              <td>Will leave both double and single quotes unconverted.</td>
2805
     *                              </tr>
2806
     *                              <tr valign="top">
2807
     *                              <td><b>ENT_IGNORE</b></td>
2808
     *                              <td>
2809
     *                              Silently discard invalid code unit sequences instead of returning
2810
     *                              an empty string. Using this flag is discouraged as it
2811
     *                              may have security implications.
2812
     *                              </td>
2813
     *                              </tr>
2814
     *                              <tr valign="top">
2815
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2816
     *                              <td>
2817
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2818
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2819
     *                              string.
2820
     *                              </td>
2821
     *                              </tr>
2822
     *                              <tr valign="top">
2823
     *                              <td><b>ENT_DISALLOWED</b></td>
2824
     *                              <td>
2825
     *                              Replace invalid code points for the given document type with a
2826
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2827
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2828
     *                              instance, to ensure the well-formedness of XML documents with
2829
     *                              embedded external content.
2830
     *                              </td>
2831
     *                              </tr>
2832
     *                              <tr valign="top">
2833
     *                              <td><b>ENT_HTML401</b></td>
2834
     *                              <td>
2835
     *                              Handle code as HTML 4.01.
2836
     *                              </td>
2837
     *                              </tr>
2838
     *                              <tr valign="top">
2839
     *                              <td><b>ENT_XML1</b></td>
2840
     *                              <td>
2841
     *                              Handle code as XML 1.
2842
     *                              </td>
2843
     *                              </tr>
2844
     *                              <tr valign="top">
2845
     *                              <td><b>ENT_XHTML</b></td>
2846
     *                              <td>
2847
     *                              Handle code as XHTML.
2848
     *                              </td>
2849
     *                              </tr>
2850
     *                              <tr valign="top">
2851
     *                              <td><b>ENT_HTML5</b></td>
2852
     *                              <td>
2853
     *                              Handle code as HTML 5.
2854
     *                              </td>
2855
     *                              </tr>
2856
     *                              </table>
2857
     *                              </p>
2858
     * @param string $encoding      [optional] <p>
2859
     *                              Defines encoding used in conversion.
2860
     *                              </p>
2861
     *                              <p>
2862
     *                              For the purposes of this function, the encodings
2863
     *                              ISO-8859-1, ISO-8859-15,
2864
     *                              UTF-8, cp866,
2865
     *                              cp1251, cp1252, and
2866
     *                              KOI8-R are effectively equivalent, provided the
2867
     *                              <i>string</i> itself is valid for the encoding, as
2868
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2869
     *                              the same positions in all of these encodings.
2870
     *                              </p>
2871
     * @param bool   $double_encode [optional] <p>
2872
     *                              When <i>double_encode</i> is turned off PHP will not
2873
     *                              encode existing html entities, the default is to convert everything.
2874
     *                              </p>
2875
     *
2876
     * @return string the converted string.
2877
     *                </p>
2878
     *                <p>
2879
     *                If the input <i>string</i> contains an invalid code unit
2880
     *                sequence within the given <i>encoding</i> an empty string
2881
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2882
     *                <b>ENT_SUBSTITUTE</b> flags are set
2883
     */
2884 8
    public static function htmlspecialchars(
2885
        string $str,
2886
        int $flags = \ENT_COMPAT,
2887
        string $encoding = 'UTF-8',
2888
        bool $double_encode = true
2889
    ): string {
2890 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2891 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2892
        }
2893
2894 8
        return \htmlspecialchars(
2895 8
            $str,
2896 8
            $flags,
2897 8
            $encoding,
2898 8
            $double_encode
2899
        );
2900
    }
2901
2902
    /**
2903
     * Checks whether iconv is available on the server.
2904
     *
2905
     * @return bool
2906
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2907
     */
2908
    public static function iconv_loaded(): bool
2909
    {
2910
        return \extension_loaded('iconv');
2911
    }
2912
2913
    /**
2914
     * alias for "UTF8::decimal_to_chr()"
2915
     *
2916
     * @param mixed $int
2917
     *
2918
     * @return string
2919
     *
2920
     * @see UTF8::decimal_to_chr()
2921
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
2922
     */
2923 4
    public static function int_to_chr($int): string
2924
    {
2925 4
        return self::decimal_to_chr($int);
2926
    }
2927
2928
    /**
2929
     * Converts Integer to hexadecimal U+xxxx code point representation.
2930
     *
2931
     * INFO: opposite to UTF8::hex_to_int()
2932
     *
2933
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2934
     * @param string $prefix [optional]
2935
     *
2936
     * @return string the code point, or empty string on failure
2937
     */
2938 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2939
    {
2940 6
        $hex = \dechex($int);
2941
2942 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2943
2944 6
        return $prefix . $hex . '';
2945
    }
2946
2947
    /**
2948
     * Checks whether intl-char is available on the server.
2949
     *
2950
     * @return bool
2951
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2952
     */
2953
    public static function intlChar_loaded(): bool
2954
    {
2955
        return \class_exists('IntlChar');
2956
    }
2957
2958
    /**
2959
     * Checks whether intl is available on the server.
2960
     *
2961
     * @return bool
2962
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2963
     */
2964 5
    public static function intl_loaded(): bool
2965
    {
2966 5
        return \extension_loaded('intl');
2967
    }
2968
2969
    /**
2970
     * alias for "UTF8::is_ascii()"
2971
     *
2972
     * @param string $str
2973
     *
2974
     * @return bool
2975
     *
2976
     * @see UTF8::is_ascii()
2977
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2978
     */
2979 2
    public static function isAscii(string $str): bool
2980
    {
2981 2
        return ASCII::is_ascii($str);
2982
    }
2983
2984
    /**
2985
     * alias for "UTF8::is_base64()"
2986
     *
2987
     * @param string $str
2988
     *
2989
     * @return bool
2990
     *
2991
     * @see UTF8::is_base64()
2992
     * @deprecated <p>please use "UTF8::is_base64()"</p>
2993
     */
2994 2
    public static function isBase64($str): bool
2995
    {
2996 2
        return self::is_base64($str);
2997
    }
2998
2999
    /**
3000
     * alias for "UTF8::is_binary()"
3001
     *
3002
     * @param mixed $str
3003
     * @param bool  $strict
3004
     *
3005
     * @return bool
3006
     *
3007
     * @see UTF8::is_binary()
3008
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3009
     */
3010 4
    public static function isBinary($str, $strict = false): bool
3011
    {
3012 4
        return self::is_binary($str, $strict);
3013
    }
3014
3015
    /**
3016
     * alias for "UTF8::is_bom()"
3017
     *
3018
     * @param string $utf8_chr
3019
     *
3020
     * @return bool
3021
     *
3022
     * @see UTF8::is_bom()
3023
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3024
     */
3025 2
    public static function isBom(string $utf8_chr): bool
3026
    {
3027 2
        return self::is_bom($utf8_chr);
3028
    }
3029
3030
    /**
3031
     * alias for "UTF8::is_html()"
3032
     *
3033
     * @param string $str
3034
     *
3035
     * @return bool
3036
     *
3037
     * @see UTF8::is_html()
3038
     * @deprecated <p>please use "UTF8::is_html()"</p>
3039
     */
3040 2
    public static function isHtml(string $str): bool
3041
    {
3042 2
        return self::is_html($str);
3043
    }
3044
3045
    /**
3046
     * alias for "UTF8::is_json()"
3047
     *
3048
     * @param string $str
3049
     *
3050
     * @return bool
3051
     *
3052
     * @see UTF8::is_json()
3053
     * @deprecated <p>please use "UTF8::is_json()"</p>
3054
     */
3055
    public static function isJson(string $str): bool
3056
    {
3057
        return self::is_json($str);
3058
    }
3059
3060
    /**
3061
     * alias for "UTF8::is_utf16()"
3062
     *
3063
     * @param mixed $str
3064
     *
3065
     * @return false|int
3066
     *                   <strong>false</strong> if is't not UTF16,<br>
3067
     *                   <strong>1</strong> for UTF-16LE,<br>
3068
     *                   <strong>2</strong> for UTF-16BE
3069
     *
3070
     * @see UTF8::is_utf16()
3071
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3072
     */
3073 2
    public static function isUtf16($str)
3074
    {
3075 2
        return self::is_utf16($str);
3076
    }
3077
3078
    /**
3079
     * alias for "UTF8::is_utf32()"
3080
     *
3081
     * @param mixed $str
3082
     *
3083
     * @return false|int
3084
     *                   <strong>false</strong> if is't not UTF16,
3085
     *                   <strong>1</strong> for UTF-32LE,
3086
     *                   <strong>2</strong> for UTF-32BE
3087
     *
3088
     * @see UTF8::is_utf32()
3089
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3090
     */
3091 2
    public static function isUtf32($str)
3092
    {
3093 2
        return self::is_utf32($str);
3094
    }
3095
3096
    /**
3097
     * alias for "UTF8::is_utf8()"
3098
     *
3099
     * @param string $str
3100
     * @param bool   $strict
3101
     *
3102
     * @return bool
3103
     *
3104
     * @see UTF8::is_utf8()
3105
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3106
     */
3107 17
    public static function isUtf8($str, $strict = false): bool
3108
    {
3109 17
        return self::is_utf8($str, $strict);
3110
    }
3111
3112
    /**
3113
     * Returns true if the string contains only alphabetic chars, false otherwise.
3114
     *
3115
     * @param string $str <p>The input string.</p>
3116
     *
3117
     * @return bool
3118
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3119
     */
3120 10
    public static function is_alpha(string $str): bool
3121
    {
3122 10
        if (self::$SUPPORT['mbstring'] === true) {
3123
            /** @noinspection PhpComposerExtensionStubsInspection */
3124 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3125
        }
3126
3127
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3128
    }
3129
3130
    /**
3131
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3132
     *
3133
     * @param string $str <p>The input string.</p>
3134
     *
3135
     * @return bool
3136
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3137
     */
3138 13
    public static function is_alphanumeric(string $str): bool
3139
    {
3140 13
        if (self::$SUPPORT['mbstring'] === true) {
3141
            /** @noinspection PhpComposerExtensionStubsInspection */
3142 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3143
        }
3144
3145
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3146
    }
3147
3148
    /**
3149
     * Checks if a string is 7 bit ASCII.
3150
     *
3151
     * @param string $str <p>The string to check.</p>
3152
     *
3153
     * @return bool
3154
     *              <p>
3155
     *              <strong>true</strong> if it is ASCII<br>
3156
     *              <strong>false</strong> otherwise
3157
     *              </p>
3158
     */
3159 8
    public static function is_ascii(string $str): bool
3160
    {
3161 8
        return ASCII::is_ascii($str);
3162
    }
3163
3164
    /**
3165
     * Returns true if the string is base64 encoded, false otherwise.
3166
     *
3167
     * @param mixed|string $str                   <p>The input string.</p>
3168
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3169
     *
3170
     * @return bool whether or not $str is base64 encoded
3171
     */
3172 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3173
    {
3174
        if (
3175 16
            $empty_string_is_valid === false
3176
            &&
3177 16
            $str === ''
3178
        ) {
3179 3
            return false;
3180
        }
3181
3182
        /**
3183
         * @psalm-suppress RedundantConditionGivenDocblockType
3184
         */
3185 15
        if (\is_string($str) === false) {
3186 2
            return false;
3187
        }
3188
3189 15
        $base64String = \base64_decode($str, true);
3190
3191 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3192
    }
3193
3194
    /**
3195
     * Check if the input is binary... (is look like a hack).
3196
     *
3197
     * @param mixed $input
3198
     * @param bool  $strict
3199
     *
3200
     * @return bool
3201
     */
3202 39
    public static function is_binary($input, bool $strict = false): bool
3203
    {
3204 39
        $input = (string) $input;
3205 39
        if ($input === '') {
3206 10
            return false;
3207
        }
3208
3209 39
        if (\preg_match('~^[01]+$~', $input)) {
3210 13
            return true;
3211
        }
3212
3213 39
        $ext = self::get_file_type($input);
3214 39
        if ($ext['type'] === 'binary') {
3215 7
            return true;
3216
        }
3217
3218 38
        $test_length = \strlen($input);
3219 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3220 38
        if (($test_null_counting / $test_length) > 0.25) {
3221 15
            return true;
3222
        }
3223
3224 34
        if ($strict === true) {
3225 34
            if (self::$SUPPORT['finfo'] === false) {
3226
                throw new \RuntimeException('ext-fileinfo: is not installed');
3227
            }
3228
3229
            /** @noinspection PhpComposerExtensionStubsInspection */
3230 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3231 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3232 15
                return true;
3233
            }
3234
        }
3235
3236 30
        return false;
3237
    }
3238
3239
    /**
3240
     * Check if the file is binary.
3241
     *
3242
     * @param string $file
3243
     *
3244
     * @return bool
3245
     */
3246 6
    public static function is_binary_file($file): bool
3247
    {
3248
        // init
3249 6
        $block = '';
3250
3251 6
        $fp = \fopen($file, 'rb');
3252 6
        if (\is_resource($fp)) {
3253 6
            $block = \fread($fp, 512);
3254 6
            \fclose($fp);
3255
        }
3256
3257 6
        if ($block === '') {
3258 2
            return false;
3259
        }
3260
3261 6
        return self::is_binary($block, true);
3262
    }
3263
3264
    /**
3265
     * Returns true if the string contains only whitespace chars, false otherwise.
3266
     *
3267
     * @param string $str <p>The input string.</p>
3268
     *
3269
     * @return bool
3270
     *              <p>Whether or not $str contains only whitespace characters.</p>
3271
     */
3272 15
    public static function is_blank(string $str): bool
3273
    {
3274 15
        if (self::$SUPPORT['mbstring'] === true) {
3275
            /** @noinspection PhpComposerExtensionStubsInspection */
3276 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3277
        }
3278
3279
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3280
    }
3281
3282
    /**
3283
     * Checks if the given string is equal to any "Byte Order Mark".
3284
     *
3285
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3286
     *
3287
     * @param string $str <p>The input string.</p>
3288
     *
3289
     * @return bool
3290
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3291
     */
3292 2
    public static function is_bom($str): bool
3293
    {
3294
        /** @noinspection PhpUnusedLocalVariableInspection */
3295 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3296 2
            if ($str === $bom_string) {
3297 2
                return true;
3298
            }
3299
        }
3300
3301 2
        return false;
3302
    }
3303
3304
    /**
3305
     * Determine whether the string is considered to be empty.
3306
     *
3307
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3308
     * empty() does not generate a warning if the variable does not exist.
3309
     *
3310
     * @param mixed $str
3311
     *
3312
     * @return bool whether or not $str is empty()
3313
     */
3314
    public static function is_empty($str): bool
3315
    {
3316
        return empty($str);
3317
    }
3318
3319
    /**
3320
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3321
     *
3322
     * @param string $str <p>The input string.</p>
3323
     *
3324
     * @return bool
3325
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3326
     */
3327 13
    public static function is_hexadecimal(string $str): bool
3328
    {
3329 13
        if (self::$SUPPORT['mbstring'] === true) {
3330
            /** @noinspection PhpComposerExtensionStubsInspection */
3331 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3332
        }
3333
3334
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3335
    }
3336
3337
    /**
3338
     * Check if the string contains any HTML tags.
3339
     *
3340
     * @param string $str <p>The input string.</p>
3341
     *
3342
     * @return bool
3343
     *              <p>Whether or not $str contains html elements.</p>
3344
     */
3345 3
    public static function is_html(string $str): bool
3346
    {
3347 3
        if ($str === '') {
3348 3
            return false;
3349
        }
3350
3351
        // init
3352 3
        $matches = [];
3353
3354 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3355
3356 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3357
3358 3
        return $matches !== [];
3359
    }
3360
3361
    /**
3362
     * Try to check if "$str" is a JSON-string.
3363
     *
3364
     * @param string $str                                    <p>The input string.</p>
3365
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3366
     *
3367
     * @return bool
3368
     *              <p>Whether or not the $str is in JSON format.</p>
3369
     */
3370 42
    public static function is_json(
3371
        string $str,
3372
        $only_array_or_object_results_are_valid = true
3373
    ): bool {
3374 42
        if ($str === '') {
3375 4
            return false;
3376
        }
3377
3378 40
        if (self::$SUPPORT['json'] === false) {
3379
            throw new \RuntimeException('ext-json: is not installed');
3380
        }
3381
3382 40
        $json = self::json_decode($str);
3383 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3384 18
            return false;
3385
        }
3386
3387
        if (
3388 24
            $only_array_or_object_results_are_valid === true
3389
            &&
3390 24
            \is_object($json) === false
3391
            &&
3392 24
            \is_array($json) === false
3393
        ) {
3394 5
            return false;
3395
        }
3396
3397
        /** @noinspection PhpComposerExtensionStubsInspection */
3398 19
        return \json_last_error() === \JSON_ERROR_NONE;
3399
    }
3400
3401
    /**
3402
     * @param string $str <p>The input string.</p>
3403
     *
3404
     * @return bool
3405
     *              <p>Whether or not $str contains only lowercase chars.</p>
3406
     */
3407 8
    public static function is_lowercase(string $str): bool
3408
    {
3409 8
        if (self::$SUPPORT['mbstring'] === true) {
3410
            /** @noinspection PhpComposerExtensionStubsInspection */
3411 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3412
        }
3413
3414
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3415
    }
3416
3417
    /**
3418
     * Returns true if the string is serialized, false otherwise.
3419
     *
3420
     * @param string $str <p>The input string.</p>
3421
     *
3422
     * @return bool
3423
     *              <p>Whether or not $str is serialized.</p>
3424
     */
3425 7
    public static function is_serialized(string $str): bool
3426
    {
3427 7
        if ($str === '') {
3428 1
            return false;
3429
        }
3430
3431
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3432
        /** @noinspection UnserializeExploitsInspection */
3433 6
        return $str === 'b:0;'
3434
               ||
3435 6
               @\unserialize($str) !== false;
3436
    }
3437
3438
    /**
3439
     * Returns true if the string contains only lower case chars, false
3440
     * otherwise.
3441
     *
3442
     * @param string $str <p>The input string.</p>
3443
     *
3444
     * @return bool
3445
     *              <p>Whether or not $str contains only lower case characters.</p>
3446
     */
3447 8
    public static function is_uppercase(string $str): bool
3448
    {
3449 8
        if (self::$SUPPORT['mbstring'] === true) {
3450
            /** @noinspection PhpComposerExtensionStubsInspection */
3451 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3452
        }
3453
3454
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3455
    }
3456
3457
    /**
3458
     * Check if the string is UTF-16.
3459
     *
3460
     * @param mixed $str                       <p>The input string.</p>
3461
     * @param bool  $check_if_string_is_binary
3462
     *
3463
     * @return false|int
3464
     *                   <strong>false</strong> if is't not UTF-16,<br>
3465
     *                   <strong>1</strong> for UTF-16LE,<br>
3466
     *                   <strong>2</strong> for UTF-16BE
3467
     */
3468 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3469
    {
3470
        // init
3471 22
        $str = (string) $str;
3472 22
        $str_chars = [];
3473
3474
        if (
3475 22
            $check_if_string_is_binary === true
3476
            &&
3477 22
            self::is_binary($str, true) === false
3478
        ) {
3479 2
            return false;
3480
        }
3481
3482 22
        if (self::$SUPPORT['mbstring'] === false) {
3483 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3484
        }
3485
3486 22
        $str = self::remove_bom($str);
3487
3488 22
        $maybe_utf16le = 0;
3489 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3490 22
        if ($test) {
3491 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3492 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3493 15
            if ($test3 === $test) {
3494
                /**
3495
                 * @psalm-suppress RedundantCondition
3496
                 */
3497 15
                if ($str_chars === []) {
3498 15
                    $str_chars = self::count_chars($str, true, false);
3499
                }
3500 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3501 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3502 15
                        ++$maybe_utf16le;
3503
                    }
3504
                }
3505 15
                unset($test3charEmpty);
3506
            }
3507
        }
3508
3509 22
        $maybe_utf16be = 0;
3510 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3511 22
        if ($test) {
3512 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3513 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3514 15
            if ($test3 === $test) {
3515 15
                if ($str_chars === []) {
3516 7
                    $str_chars = self::count_chars($str, true, false);
3517
                }
3518 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3520 15
                        ++$maybe_utf16be;
3521
                    }
3522
                }
3523 15
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3528 7
            if ($maybe_utf16le > $maybe_utf16be) {
3529 5
                return 1;
3530
            }
3531
3532 6
            return 2;
3533
        }
3534
3535 18
        return false;
3536
    }
3537
3538
    /**
3539
     * Check if the string is UTF-32.
3540
     *
3541
     * @param mixed $str                       <p>The input string.</p>
3542
     * @param bool  $check_if_string_is_binary
3543
     *
3544
     * @return false|int
3545
     *                   <strong>false</strong> if is't not UTF-32,<br>
3546
     *                   <strong>1</strong> for UTF-32LE,<br>
3547
     *                   <strong>2</strong> for UTF-32BE
3548
     */
3549 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3550
    {
3551
        // init
3552 20
        $str = (string) $str;
3553 20
        $str_chars = [];
3554
3555
        if (
3556 20
            $check_if_string_is_binary === true
3557
            &&
3558 20
            self::is_binary($str, true) === false
3559
        ) {
3560 2
            return false;
3561
        }
3562
3563 20
        if (self::$SUPPORT['mbstring'] === false) {
3564 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3565
        }
3566
3567 20
        $str = self::remove_bom($str);
3568
3569 20
        $maybe_utf32le = 0;
3570 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3571 20
        if ($test) {
3572 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3573 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3574 13
            if ($test3 === $test) {
3575
                /**
3576
                 * @psalm-suppress RedundantCondition
3577
                 */
3578 13
                if ($str_chars === []) {
3579 13
                    $str_chars = self::count_chars($str, true, false);
3580
                }
3581 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3582 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3583 13
                        ++$maybe_utf32le;
3584
                    }
3585
                }
3586 13
                unset($test3charEmpty);
3587
            }
3588
        }
3589
3590 20
        $maybe_utf32be = 0;
3591 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3592 20
        if ($test) {
3593 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3594 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3595 13
            if ($test3 === $test) {
3596 13
                if ($str_chars === []) {
3597 7
                    $str_chars = self::count_chars($str, true, false);
3598
                }
3599 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3600 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3601 13
                        ++$maybe_utf32be;
3602
                    }
3603
                }
3604 13
                unset($test3charEmpty);
3605
            }
3606
        }
3607
3608 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3609 3
            if ($maybe_utf32le > $maybe_utf32be) {
3610 2
                return 1;
3611
            }
3612
3613 3
            return 2;
3614
        }
3615
3616 20
        return false;
3617
    }
3618
3619
    /**
3620
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3621
     *
3622
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3623
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3624
     *
3625
     * @return bool
3626
     */
3627 82
    public static function is_utf8($str, bool $strict = false): bool
3628
    {
3629 82
        if (\is_array($str) === true) {
3630 2
            foreach ($str as &$v) {
3631 2
                if (self::is_utf8($v, $strict) === false) {
3632 2
                    return false;
3633
                }
3634
            }
3635
3636
            return true;
3637
        }
3638
3639 82
        return self::is_utf8_string((string) $str, $strict);
3640
    }
3641
3642
    /**
3643
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3644
     * Decodes a JSON string
3645
     *
3646
     * @see http://php.net/manual/en/function.json-decode.php
3647
     *
3648
     * @param string $json    <p>
3649
     *                        The <i>json</i> string being decoded.
3650
     *                        </p>
3651
     *                        <p>
3652
     *                        This function only works with UTF-8 encoded strings.
3653
     *                        </p>
3654
     *                        <p>PHP implements a superset of
3655
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3656
     *                        only supports these values when they are nested inside an array or an object.
3657
     *                        </p>
3658
     * @param bool   $assoc   [optional] <p>
3659
     *                        When <b>TRUE</b>, returned objects will be converted into
3660
     *                        associative arrays.
3661
     *                        </p>
3662
     * @param int    $depth   [optional] <p>
3663
     *                        User specified recursion depth.
3664
     *                        </p>
3665
     * @param int    $options [optional] <p>
3666
     *                        Bitmask of JSON decode options. Currently only
3667
     *                        <b>JSON_BIGINT_AS_STRING</b>
3668
     *                        is supported (default is to cast large integers as floats)
3669
     *                        </p>
3670
     *
3671
     * @return mixed
3672
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3673
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3674
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3675
     *               is deeper than the recursion limit.
3676
     */
3677 43
    public static function json_decode(
3678
        string $json,
3679
        bool $assoc = false,
3680
        int $depth = 512,
3681
        int $options = 0
3682
    ) {
3683 43
        $json = self::filter($json);
3684
3685 43
        if (self::$SUPPORT['json'] === false) {
3686
            throw new \RuntimeException('ext-json: is not installed');
3687
        }
3688
3689
        /** @noinspection PhpComposerExtensionStubsInspection */
3690 43
        return \json_decode($json, $assoc, $depth, $options);
3691
    }
3692
3693
    /**
3694
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3695
     * Returns the JSON representation of a value.
3696
     *
3697
     * @see http://php.net/manual/en/function.json-encode.php
3698
     *
3699
     * @param mixed $value   <p>
3700
     *                       The <i>value</i> being encoded. Can be any type except
3701
     *                       a resource.
3702
     *                       </p>
3703
     *                       <p>
3704
     *                       All string data must be UTF-8 encoded.
3705
     *                       </p>
3706
     *                       <p>PHP implements a superset of
3707
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3708
     *                       only supports these values when they are nested inside an array or an object.
3709
     *                       </p>
3710
     * @param int   $options [optional] <p>
3711
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3712
     *                       <b>JSON_HEX_TAG</b>,
3713
     *                       <b>JSON_HEX_AMP</b>,
3714
     *                       <b>JSON_HEX_APOS</b>,
3715
     *                       <b>JSON_NUMERIC_CHECK</b>,
3716
     *                       <b>JSON_PRETTY_PRINT</b>,
3717
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3718
     *                       <b>JSON_FORCE_OBJECT</b>,
3719
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3720
     *                       constants is described on
3721
     *                       the JSON constants page.
3722
     *                       </p>
3723
     * @param int   $depth   [optional] <p>
3724
     *                       Set the maximum depth. Must be greater than zero.
3725
     *                       </p>
3726
     *
3727
     * @return false|string
3728
     *                      A JSON encoded <strong>string</strong> on success or<br>
3729
     *                      <strong>FALSE</strong> on failure
3730
     */
3731 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3732
    {
3733 5
        $value = self::filter($value);
3734
3735 5
        if (self::$SUPPORT['json'] === false) {
3736
            throw new \RuntimeException('ext-json: is not installed');
3737
        }
3738
3739
        /** @noinspection PhpComposerExtensionStubsInspection */
3740 5
        return \json_encode($value, $options, $depth);
3741
    }
3742
3743
    /**
3744
     * Checks whether JSON is available on the server.
3745
     *
3746
     * @return bool
3747
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3748
     */
3749
    public static function json_loaded(): bool
3750
    {
3751
        return \function_exists('json_decode');
3752
    }
3753
3754
    /**
3755
     * Makes string's first char lowercase.
3756
     *
3757
     * @param string      $str                           <p>The input string</p>
3758
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3759
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3760
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3761
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3762
     *
3763
     * @return string the resulting string
3764
     */
3765 46
    public static function lcfirst(
3766
        string $str,
3767
        string $encoding = 'UTF-8',
3768
        bool $clean_utf8 = false,
3769
        string $lang = null,
3770
        bool $try_to_keep_the_string_length = false
3771
    ): string {
3772 46
        if ($clean_utf8 === true) {
3773
            $str = self::clean($str);
3774
        }
3775
3776 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3777
3778 46
        if ($encoding === 'UTF-8') {
3779 43
            $str_part_two = (string) \mb_substr($str, 1);
3780
3781 43
            if ($use_mb_functions === true) {
3782 43
                $str_part_one = \mb_strtolower(
3783 43
                    (string) \mb_substr($str, 0, 1)
3784
                );
3785
            } else {
3786
                $str_part_one = self::strtolower(
3787
                    (string) \mb_substr($str, 0, 1),
3788
                    $encoding,
3789
                    false,
3790
                    $lang,
3791 43
                    $try_to_keep_the_string_length
3792
                );
3793
            }
3794
        } else {
3795 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3796
3797 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3798
3799 3
            $str_part_one = self::strtolower(
3800 3
                (string) self::substr($str, 0, 1, $encoding),
3801 3
                $encoding,
3802 3
                false,
3803 3
                $lang,
3804 3
                $try_to_keep_the_string_length
3805
            );
3806
        }
3807
3808 46
        return $str_part_one . $str_part_two;
3809
    }
3810
3811
    /**
3812
     * alias for "UTF8::lcfirst()"
3813
     *
3814
     * @param string      $str
3815
     * @param string      $encoding
3816
     * @param bool        $clean_utf8
3817
     * @param string|null $lang
3818
     * @param bool        $try_to_keep_the_string_length
3819
     *
3820
     * @return string
3821
     *
3822
     * @see UTF8::lcfirst()
3823
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3824
     */
3825 2
    public static function lcword(
3826
        string $str,
3827
        string $encoding = 'UTF-8',
3828
        bool $clean_utf8 = false,
3829
        string $lang = null,
3830
        bool $try_to_keep_the_string_length = false
3831
    ): string {
3832 2
        return self::lcfirst(
3833 2
            $str,
3834 2
            $encoding,
3835 2
            $clean_utf8,
3836 2
            $lang,
3837 2
            $try_to_keep_the_string_length
3838
        );
3839
    }
3840
3841
    /**
3842
     * Lowercase for all words in the string.
3843
     *
3844
     * @param string      $str                           <p>The input string.</p>
3845
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3846
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3847
     *                                                   a new word.</p>
3848
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3849
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3850
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3851
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3852
     *
3853
     * @return string
3854
     */
3855 2
    public static function lcwords(
3856
        string $str,
3857
        array $exceptions = [],
3858
        string $char_list = '',
3859
        string $encoding = 'UTF-8',
3860
        bool $clean_utf8 = false,
3861
        string $lang = null,
3862
        bool $try_to_keep_the_string_length = false
3863
    ): string {
3864 2
        if (!$str) {
3865 2
            return '';
3866
        }
3867
3868 2
        $words = self::str_to_words($str, $char_list);
3869 2
        $use_exceptions = $exceptions !== [];
3870
3871 2
        $words_str = '';
3872 2
        foreach ($words as &$word) {
3873 2
            if (!$word) {
3874 2
                continue;
3875
            }
3876
3877
            if (
3878 2
                $use_exceptions === false
3879
                ||
3880 2
                !\in_array($word, $exceptions, true)
3881
            ) {
3882 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3883
            } else {
3884 2
                $words_str .= $word;
3885
            }
3886
        }
3887
3888 2
        return $words_str;
3889
    }
3890
3891
    /**
3892
     * alias for "UTF8::lcfirst()"
3893
     *
3894
     * @param string      $str
3895
     * @param string      $encoding
3896
     * @param bool        $clean_utf8
3897
     * @param string|null $lang
3898
     * @param bool        $try_to_keep_the_string_length
3899
     *
3900
     * @return string
3901
     *
3902
     * @see UTF8::lcfirst()
3903
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3904
     */
3905 5
    public static function lowerCaseFirst(
3906
        string $str,
3907
        string $encoding = 'UTF-8',
3908
        bool $clean_utf8 = false,
3909
        string $lang = null,
3910
        bool $try_to_keep_the_string_length = false
3911
    ): string {
3912 5
        return self::lcfirst(
3913 5
            $str,
3914 5
            $encoding,
3915 5
            $clean_utf8,
3916 5
            $lang,
3917 5
            $try_to_keep_the_string_length
3918
        );
3919
    }
3920
3921
    /**
3922
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
3923
     *
3924
     * @param string      $str   <p>The string to be trimmed</p>
3925
     * @param string|null $chars <p>Optional characters to be stripped</p>
3926
     *
3927
     * @return string the string with unwanted characters stripped from the left
3928
     */
3929 22
    public static function ltrim(string $str = '', string $chars = null): string
3930
    {
3931 22
        if ($str === '') {
3932 3
            return '';
3933
        }
3934
3935 21
        if (self::$SUPPORT['mbstring'] === true) {
3936 21
            if ($chars) {
3937
                /** @noinspection PregQuoteUsageInspection */
3938 10
                $chars = \preg_quote($chars);
3939 10
                $pattern = "^[${chars}]+";
3940
            } else {
3941 14
                $pattern = '^[\\s]+';
3942
            }
3943
3944
            /** @noinspection PhpComposerExtensionStubsInspection */
3945 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3946
        }
3947
3948
        if ($chars) {
3949
            $chars = \preg_quote($chars, '/');
3950
            $pattern = "^[${chars}]+";
3951
        } else {
3952
            $pattern = '^[\\s]+';
3953
        }
3954
3955
        return self::regex_replace($str, $pattern, '', '', '/');
3956
    }
3957
3958
    /**
3959
     * Returns the UTF-8 character with the maximum code point in the given data.
3960
     *
3961
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3962
     *
3963
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3964
     */
3965 2
    public static function max($arg)
3966
    {
3967 2
        if (\is_array($arg) === true) {
3968 2
            $arg = \implode('', $arg);
3969
        }
3970
3971 2
        $codepoints = self::codepoints($arg, false);
3972 2
        if ($codepoints === []) {
3973 2
            return null;
3974
        }
3975
3976 2
        $codepoint_max = \max($codepoints);
3977
3978 2
        return self::chr($codepoint_max);
3979
    }
3980
3981
    /**
3982
     * Calculates and returns the maximum number of bytes taken by any
3983
     * UTF-8 encoded character in the given string.
3984
     *
3985
     * @param string $str <p>The original Unicode string.</p>
3986
     *
3987
     * @return int
3988
     *             <p>Max byte lengths of the given chars.</p>
3989
     */
3990 2
    public static function max_chr_width(string $str): int
3991
    {
3992 2
        $bytes = self::chr_size_list($str);
3993 2
        if ($bytes !== []) {
3994 2
            return (int) \max($bytes);
3995
        }
3996
3997 2
        return 0;
3998
    }
3999
4000
    /**
4001
     * Checks whether mbstring is available on the server.
4002
     *
4003
     * @return bool
4004
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4005
     */
4006 26
    public static function mbstring_loaded(): bool
4007
    {
4008 26
        return \extension_loaded('mbstring');
4009
    }
4010
4011
    /**
4012
     * Returns the UTF-8 character with the minimum code point in the given data.
4013
     *
4014
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4015
     *
4016
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4017
     */
4018 2
    public static function min($arg)
4019
    {
4020 2
        if (\is_array($arg) === true) {
4021 2
            $arg = \implode('', $arg);
4022
        }
4023
4024 2
        $codepoints = self::codepoints($arg, false);
4025 2
        if ($codepoints === []) {
4026 2
            return null;
4027
        }
4028
4029 2
        $codepoint_min = \min($codepoints);
4030
4031 2
        return self::chr($codepoint_min);
4032
    }
4033
4034
    /**
4035
     * alias for "UTF8::normalize_encoding()"
4036
     *
4037
     * @param mixed $encoding
4038
     * @param mixed $fallback
4039
     *
4040
     * @return mixed
4041
     *
4042
     * @see UTF8::normalize_encoding()
4043
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4044
     */
4045 2
    public static function normalizeEncoding($encoding, $fallback = '')
4046
    {
4047 2
        return self::normalize_encoding($encoding, $fallback);
4048
    }
4049
4050
    /**
4051
     * Normalize the encoding-"name" input.
4052
     *
4053
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4054
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4055
     *
4056
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4057
     */
4058 331
    public static function normalize_encoding($encoding, $fallback = '')
4059
    {
4060 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4061
4062
        // init
4063 331
        $encoding = (string) $encoding;
4064
4065 331
        if (!$encoding) {
4066 285
            return $fallback;
4067
        }
4068
4069
        if (
4070 51
            $encoding === 'UTF-8'
4071
            ||
4072 51
            $encoding === 'UTF8'
4073
        ) {
4074 28
            return 'UTF-8';
4075
        }
4076
4077
        if (
4078 43
            $encoding === '8BIT'
4079
            ||
4080 43
            $encoding === 'BINARY'
4081
        ) {
4082
            return 'CP850';
4083
        }
4084
4085
        if (
4086 43
            $encoding === 'HTML'
4087
            ||
4088 43
            $encoding === 'HTML-ENTITIES'
4089
        ) {
4090 2
            return 'HTML-ENTITIES';
4091
        }
4092
4093
        if (
4094 43
            $encoding === 'ISO'
4095
            ||
4096 43
            $encoding === 'ISO-8859-1'
4097
        ) {
4098 39
            return 'ISO-8859-1';
4099
        }
4100
4101
        if (
4102 12
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4103
            ||
4104 12
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4105
        ) {
4106 1
            return $fallback;
4107
        }
4108
4109 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4110 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4111
        }
4112
4113 5
        if (self::$ENCODINGS === null) {
4114 1
            self::$ENCODINGS = self::getData('encodings');
4115
        }
4116
4117 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4118 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4119
4120 3
            return $encoding;
4121
        }
4122
4123 4
        $encoding_original = $encoding;
4124 4
        $encoding = \strtoupper($encoding);
4125 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4126
4127
        $equivalences = [
4128 4
            'ISO8859'     => 'ISO-8859-1',
4129
            'ISO88591'    => 'ISO-8859-1',
4130
            'ISO'         => 'ISO-8859-1',
4131
            'LATIN'       => 'ISO-8859-1',
4132
            'LATIN1'      => 'ISO-8859-1', // Western European
4133
            'ISO88592'    => 'ISO-8859-2',
4134
            'LATIN2'      => 'ISO-8859-2', // Central European
4135
            'ISO88593'    => 'ISO-8859-3',
4136
            'LATIN3'      => 'ISO-8859-3', // Southern European
4137
            'ISO88594'    => 'ISO-8859-4',
4138
            'LATIN4'      => 'ISO-8859-4', // Northern European
4139
            'ISO88595'    => 'ISO-8859-5',
4140
            'ISO88596'    => 'ISO-8859-6', // Greek
4141
            'ISO88597'    => 'ISO-8859-7',
4142
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4143
            'ISO88599'    => 'ISO-8859-9',
4144
            'LATIN5'      => 'ISO-8859-9', // Turkish
4145
            'ISO885911'   => 'ISO-8859-11',
4146
            'TIS620'      => 'ISO-8859-11', // Thai
4147
            'ISO885910'   => 'ISO-8859-10',
4148
            'LATIN6'      => 'ISO-8859-10', // Nordic
4149
            'ISO885913'   => 'ISO-8859-13',
4150
            'LATIN7'      => 'ISO-8859-13', // Baltic
4151
            'ISO885914'   => 'ISO-8859-14',
4152
            'LATIN8'      => 'ISO-8859-14', // Celtic
4153
            'ISO885915'   => 'ISO-8859-15',
4154
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4155
            'ISO885916'   => 'ISO-8859-16',
4156
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4157
            'CP1250'      => 'WINDOWS-1250',
4158
            'WIN1250'     => 'WINDOWS-1250',
4159
            'WINDOWS1250' => 'WINDOWS-1250',
4160
            'CP1251'      => 'WINDOWS-1251',
4161
            'WIN1251'     => 'WINDOWS-1251',
4162
            'WINDOWS1251' => 'WINDOWS-1251',
4163
            'CP1252'      => 'WINDOWS-1252',
4164
            'WIN1252'     => 'WINDOWS-1252',
4165
            'WINDOWS1252' => 'WINDOWS-1252',
4166
            'CP1253'      => 'WINDOWS-1253',
4167
            'WIN1253'     => 'WINDOWS-1253',
4168
            'WINDOWS1253' => 'WINDOWS-1253',
4169
            'CP1254'      => 'WINDOWS-1254',
4170
            'WIN1254'     => 'WINDOWS-1254',
4171
            'WINDOWS1254' => 'WINDOWS-1254',
4172
            'CP1255'      => 'WINDOWS-1255',
4173
            'WIN1255'     => 'WINDOWS-1255',
4174
            'WINDOWS1255' => 'WINDOWS-1255',
4175
            'CP1256'      => 'WINDOWS-1256',
4176
            'WIN1256'     => 'WINDOWS-1256',
4177
            'WINDOWS1256' => 'WINDOWS-1256',
4178
            'CP1257'      => 'WINDOWS-1257',
4179
            'WIN1257'     => 'WINDOWS-1257',
4180
            'WINDOWS1257' => 'WINDOWS-1257',
4181
            'CP1258'      => 'WINDOWS-1258',
4182
            'WIN1258'     => 'WINDOWS-1258',
4183
            'WINDOWS1258' => 'WINDOWS-1258',
4184
            'UTF16'       => 'UTF-16',
4185
            'UTF32'       => 'UTF-32',
4186
            'UTF8'        => 'UTF-8',
4187
            'UTF'         => 'UTF-8',
4188
            'UTF7'        => 'UTF-7',
4189
            '8BIT'        => 'CP850',
4190
            'BINARY'      => 'CP850',
4191
        ];
4192
4193 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4194 3
            $encoding = $equivalences[$encoding_upper_helper];
4195
        }
4196
4197 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4198
4199 4
        return $encoding;
4200
    }
4201
4202
    /**
4203
     * Standardize line ending to unix-like.
4204
     *
4205
     * @param string $str      <p>The input string.</p>
4206
     * @param string $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL here.</p>
4207
     *
4208
     * @return string
4209
     *                <p>A string with normalized line ending.</p>
4210
     */
4211 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4212
    {
4213 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4214
    }
4215
4216
    /**
4217
     * Normalize some MS Word special characters.
4218
     *
4219
     * @param string $str <p>The string to be normalized.</p>
4220
     *
4221
     * @return string
4222
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4223
     */
4224 10
    public static function normalize_msword(string $str): string
4225
    {
4226 10
        return ASCII::normalize_msword($str);
4227
    }
4228
4229
    /**
4230
     * Normalize the whitespace.
4231
     *
4232
     * @param string $str                        <p>The string to be normalized.</p>
4233
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4234
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4235
     *                                           bidirectional text chars.</p>
4236
     *
4237
     * @return string
4238
     *                <p>A string with normalized whitespace.</p>
4239
     */
4240 61
    public static function normalize_whitespace(
4241
        string $str,
4242
        bool $keep_non_breaking_space = false,
4243
        bool $keep_bidi_unicode_controls = false
4244
    ): string {
4245 61
        return ASCII::normalize_whitespace(
4246 61
            $str,
4247 61
            $keep_non_breaking_space,
4248 61
            $keep_bidi_unicode_controls
4249
        );
4250
    }
4251
4252
    /**
4253
     * Calculates Unicode code point of the given UTF-8 encoded character.
4254
     *
4255
     * INFO: opposite to UTF8::chr()
4256
     *
4257
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4258
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4259
     *
4260
     * @return int
4261
     *             <p>Unicode code point of the given character,<br>
4262
     *             0 on invalid UTF-8 byte sequence</p>
4263
     */
4264 26
    public static function ord($chr, string $encoding = 'UTF-8'): int
4265
    {
4266 26
        static $CHAR_CACHE = [];
4267
4268
        // init
4269 26
        $chr = (string) $chr;
4270
4271 26
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4272 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4273
        }
4274
4275 26
        $cache_key = $chr . $encoding;
4276 26
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4277 26
            return $CHAR_CACHE[$cache_key];
4278
        }
4279
4280
        // check again, if it's still not UTF-8
4281 10
        if ($encoding !== 'UTF-8') {
4282 3
            $chr = self::encode($encoding, $chr);
4283
        }
4284
4285 10
        if (self::$ORD === null) {
4286
            self::$ORD = self::getData('ord');
4287
        }
4288
4289 10
        if (isset(self::$ORD[$chr])) {
4290 10
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4291
        }
4292
4293
        //
4294
        // fallback via "IntlChar"
4295
        //
4296
4297 6
        if (self::$SUPPORT['intlChar'] === true) {
4298
            /** @noinspection PhpComposerExtensionStubsInspection */
4299 5
            $code = \IntlChar::ord($chr);
4300 5
            if ($code) {
4301 5
                return $CHAR_CACHE[$cache_key] = $code;
4302
            }
4303
        }
4304
4305
        //
4306
        // fallback via vanilla php
4307
        //
4308
4309
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4310 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4311
        /** @noinspection OffsetOperationsInspection */
4312 1
        $code = $chr ? $chr[1] : 0;
4313
4314
        /** @noinspection OffsetOperationsInspection */
4315 1
        if ($code >= 0xF0 && isset($chr[4])) {
4316
            /** @noinspection UnnecessaryCastingInspection */
4317
            /** @noinspection OffsetOperationsInspection */
4318
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4319
        }
4320
4321
        /** @noinspection OffsetOperationsInspection */
4322 1
        if ($code >= 0xE0 && isset($chr[3])) {
4323
            /** @noinspection UnnecessaryCastingInspection */
4324
            /** @noinspection OffsetOperationsInspection */
4325 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4326
        }
4327
4328
        /** @noinspection OffsetOperationsInspection */
4329 1
        if ($code >= 0xC0 && isset($chr[2])) {
4330
            /** @noinspection UnnecessaryCastingInspection */
4331
            /** @noinspection OffsetOperationsInspection */
4332 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4333
        }
4334
4335
        return $CHAR_CACHE[$cache_key] = $code;
4336
    }
4337
4338
    /**
4339
     * Parses the string into an array (into the the second parameter).
4340
     *
4341
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4342
     *          if the second parameter is not set!
4343
     *
4344
     * @see http://php.net/manual/en/function.parse-str.php
4345
     *
4346
     * @param string $str        <p>The input string.</p>
4347
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4348
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4349
     *
4350
     * @return bool
4351
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4352
     */
4353 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4354
    {
4355 2
        if ($clean_utf8 === true) {
4356 2
            $str = self::clean($str);
4357
        }
4358
4359 2
        if (self::$SUPPORT['mbstring'] === true) {
4360 2
            $return = \mb_parse_str($str, $result);
4361
4362 2
            return $return !== false && $result !== [];
4363
        }
4364
4365
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4366
        \parse_str($str, $result);
4367
4368
        return $result !== [];
4369
    }
4370
4371
    /**
4372
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4373
     *
4374
     * @return bool
4375
     *              <p>
4376
     *              <strong>true</strong> if support is available,<br>
4377
     *              <strong>false</strong> otherwise
4378
     *              </p>
4379
     */
4380 102
    public static function pcre_utf8_support(): bool
4381
    {
4382
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4383 102
        return (bool) @\preg_match('//u', '');
4384
    }
4385
4386
    /**
4387
     * Create an array containing a range of UTF-8 characters.
4388
     *
4389
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4390
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4391
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4392
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4393
     * @param float|int $step      [optional] <p>
4394
     *                             If a step value is given, it will be used as the
4395
     *                             increment between elements in the sequence. step
4396
     *                             should be given as a positive number. If not specified,
4397
     *                             step will default to 1.
4398
     *                             </p>
4399
     *
4400
     * @return string[]
4401
     */
4402 2
    public static function range(
4403
        $var1,
4404
        $var2,
4405
        bool $use_ctype = true,
4406
        string $encoding = 'UTF-8',
4407
        $step = 1
4408
    ): array {
4409 2
        if (!$var1 || !$var2) {
4410 2
            return [];
4411
        }
4412
4413 2
        if ($step !== 1) {
4414
            /**
4415
             * @psalm-suppress RedundantConditionGivenDocblockType
4416
             */
4417 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4418
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4419
            }
4420
4421
            /**
4422
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4423
             */
4424 1
            if ($step <= 0) {
4425
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4426
            }
4427
        }
4428
4429 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4430
            throw new \RuntimeException('ext-ctype: is not installed');
4431
        }
4432
4433 2
        $is_digit = false;
4434 2
        $is_xdigit = false;
4435
4436
        /** @noinspection PhpComposerExtensionStubsInspection */
4437 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4438 2
            $is_digit = true;
4439 2
            $start = (int) $var1;
4440 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4441
            $is_xdigit = true;
4442
            $start = (int) self::hex_to_int($var1);
4443 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4444 1
            $start = (int) $var1;
4445
        } else {
4446 2
            $start = self::ord($var1);
4447
        }
4448
4449 2
        if (!$start) {
4450
            return [];
4451
        }
4452
4453 2
        if ($is_digit) {
4454 2
            $end = (int) $var2;
4455 2
        } elseif ($is_xdigit) {
4456
            $end = (int) self::hex_to_int($var2);
4457 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4458 1
            $end = (int) $var2;
4459
        } else {
4460 2
            $end = self::ord($var2);
4461
        }
4462
4463 2
        if (!$end) {
4464
            return [];
4465
        }
4466
4467 2
        $array = [];
4468 2
        foreach (\range($start, $end, $step) as $i) {
4469 2
            $array[] = (string) self::chr((int) $i, $encoding);
4470
        }
4471
4472 2
        return $array;
4473
    }
4474
4475
    /**
4476
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4477
     *
4478
     * e.g:
4479
     * 'test+test'                     => 'test+test'
4480
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4481
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4482
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4483
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4484
     * 'Düsseldorf'                   => 'Düsseldorf'
4485
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4486
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4487
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4488
     *
4489
     * @param string $str          <p>The input string.</p>
4490
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4491
     *
4492
     * @return string
4493
     *                <p>The decoded URL, as a string.</p>
4494
     */
4495 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4496
    {
4497 6
        if ($str === '') {
4498 4
            return '';
4499
        }
4500
4501
        if (
4502 6
            \strpos($str, '&') === false
4503
            &&
4504 6
            \strpos($str, '%') === false
4505
            &&
4506 6
            \strpos($str, '+') === false
4507
            &&
4508 6
            \strpos($str, '\u') === false
4509
        ) {
4510 4
            return self::fix_simple_utf8($str);
4511
        }
4512
4513 6
        $str = self::urldecode_unicode_helper($str);
4514
4515 6
        if ($multi_decode) {
4516
            do {
4517 5
                $str_compare = $str;
4518
4519
                /**
4520
                 * @psalm-suppress PossiblyInvalidArgument
4521
                 */
4522 5
                $str = self::fix_simple_utf8(
4523 5
                    \rawurldecode(
4524 5
                        self::html_entity_decode(
4525 5
                            self::to_utf8($str),
4526 5
                            \ENT_QUOTES | \ENT_HTML5
4527
                        )
4528
                    )
4529
                );
4530 5
            } while ($str_compare !== $str);
4531
        } else {
4532
            /**
4533
             * @psalm-suppress PossiblyInvalidArgument
4534
             */
4535 1
            $str = self::fix_simple_utf8(
4536 1
                \rawurldecode(
4537 1
                    self::html_entity_decode(
4538 1
                        self::to_utf8($str),
4539 1
                        \ENT_QUOTES | \ENT_HTML5
4540
                    )
4541
                )
4542
            );
4543
        }
4544
4545 6
        return $str;
4546
    }
4547
4548
    /**
4549
     * Replaces all occurrences of $pattern in $str by $replacement.
4550
     *
4551
     * @param string $str         <p>The input string.</p>
4552
     * @param string $pattern     <p>The regular expression pattern.</p>
4553
     * @param string $replacement <p>The string to replace with.</p>
4554
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4555
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4556
     *
4557
     * @return string
4558
     */
4559 18
    public static function regex_replace(
4560
        string $str,
4561
        string $pattern,
4562
        string $replacement,
4563
        string $options = '',
4564
        string $delimiter = '/'
4565
    ): string {
4566 18
        if ($options === 'msr') {
4567 9
            $options = 'ms';
4568
        }
4569
4570
        // fallback
4571 18
        if (!$delimiter) {
4572
            $delimiter = '/';
4573
        }
4574
4575 18
        return (string) \preg_replace(
4576 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4577 18
            $replacement,
4578 18
            $str
4579
        );
4580
    }
4581
4582
    /**
4583
     * alias for "UTF8::remove_bom()"
4584
     *
4585
     * @param string $str
4586
     *
4587
     * @return string
4588
     *
4589
     * @see UTF8::remove_bom()
4590
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4591
     */
4592
    public static function removeBOM(string $str): string
4593
    {
4594
        return self::remove_bom($str);
4595
    }
4596
4597
    /**
4598
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4599
     *
4600
     * @param string $str <p>The input string.</p>
4601
     *
4602
     * @return string
4603
     *                <p>A string without UTF-BOM.</p>
4604
     */
4605 55
    public static function remove_bom(string $str): string
4606
    {
4607 55
        if ($str === '') {
4608 9
            return '';
4609
        }
4610
4611 55
        $str_length = \strlen($str);
4612 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4613 55
            if (\strpos($str, $bom_string, 0) === 0) {
4614
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
4615 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4616 11
                if ($str_tmp === false) {
4617
                    return '';
4618
                }
4619
4620 11
                $str_length -= (int) $bom_byte_length;
4621
4622 55
                $str = (string) $str_tmp;
4623
            }
4624
        }
4625
4626 55
        return $str;
4627
    }
4628
4629
    /**
4630
     * Removes duplicate occurrences of a string in another string.
4631
     *
4632
     * @param string          $str  <p>The base string.</p>
4633
     * @param string|string[] $what <p>String to search for in the base string.</p>
4634
     *
4635
     * @return string
4636
     *                <p>A string with removed duplicates.</p>
4637
     */
4638 2
    public static function remove_duplicates(string $str, $what = ' '): string
4639
    {
4640 2
        if (\is_string($what) === true) {
4641 2
            $what = [$what];
4642
        }
4643
4644
        /**
4645
         * @psalm-suppress RedundantConditionGivenDocblockType
4646
         */
4647 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4648 2
            foreach ($what as $item) {
4649 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4650
            }
4651
        }
4652
4653 2
        return $str;
4654
    }
4655
4656
    /**
4657
     * Remove html via "strip_tags()" from the string.
4658
     *
4659
     * @param string $str            <p>The input string.</p>
4660
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4661
     *                               not be stripped. Default: null
4662
     *                               </p>
4663
     *
4664
     * @return string
4665
     *                <p>A string with without html tags.</p>
4666
     */
4667 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
4668
    {
4669 6
        return \strip_tags($str, $allowable_tags);
4670
    }
4671
4672
    /**
4673
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4674
     *
4675
     * @param string $str         <p>The input string.</p>
4676
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4677
     *
4678
     * @return string
4679
     *                <p>A string without breaks.</p>
4680
     */
4681 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4682
    {
4683 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4684
    }
4685
4686
    /**
4687
     * Remove invisible characters from a string.
4688
     *
4689
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4690
     *
4691
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4692
     *
4693
     * @param string $str         <p>The input string.</p>
4694
     * @param bool   $url_encoded [optional] <p>
4695
     *                            Try to remove url encoded control character.
4696
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
4697
     *                            <br>
4698
     *                            Default: false
4699
     *                            </p>
4700
     * @param string $replacement [optional] <p>The replacement character.</p>
4701
     *
4702
     * @return string
4703
     *                <p>A string without invisible chars.</p>
4704
     */
4705 89
    public static function remove_invisible_characters(
4706
        string $str,
4707
        bool $url_encoded = false,
4708
        string $replacement = ''
4709
    ): string {
4710 89
        return ASCII::remove_invisible_characters(
4711 89
            $str,
4712 89
            $url_encoded,
4713 89
            $replacement
4714
        );
4715
    }
4716
4717
    /**
4718
     * Returns a new string with the prefix $substring removed, if present.
4719
     *
4720
     * @param string $str       <p>The input string.</p>
4721
     * @param string $substring <p>The prefix to remove.</p>
4722
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4723
     *
4724
     * @return string
4725
     *                <p>A string without the prefix $substring.</p>
4726
     */
4727 12
    public static function remove_left(
4728
        string $str,
4729
        string $substring,
4730
        string $encoding = 'UTF-8'
4731
    ): string {
4732 12
        if ($substring && \strpos($str, $substring) === 0) {
4733 6
            if ($encoding === 'UTF-8') {
4734 4
                return (string) \mb_substr(
4735 4
                    $str,
4736 4
                    (int) \mb_strlen($substring)
4737
                );
4738
            }
4739
4740 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4741
4742 2
            return (string) self::substr(
4743 2
                $str,
4744 2
                (int) self::strlen($substring, $encoding),
4745 2
                null,
4746 2
                $encoding
4747
            );
4748
        }
4749
4750 6
        return $str;
4751
    }
4752
4753
    /**
4754
     * Returns a new string with the suffix $substring removed, if present.
4755
     *
4756
     * @param string $str
4757
     * @param string $substring <p>The suffix to remove.</p>
4758
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4759
     *
4760
     * @return string
4761
     *                <p>A string having a $str without the suffix $substring.</p>
4762
     */
4763 12
    public static function remove_right(
4764
        string $str,
4765
        string $substring,
4766
        string $encoding = 'UTF-8'
4767
    ): string {
4768 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4769 6
            if ($encoding === 'UTF-8') {
4770 4
                return (string) \mb_substr(
4771 4
                    $str,
4772 4
                    0,
4773 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4774
                );
4775
            }
4776
4777 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4778
4779 2
            return (string) self::substr(
4780 2
                $str,
4781 2
                0,
4782 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4783 2
                $encoding
4784
            );
4785
        }
4786
4787 6
        return $str;
4788
    }
4789
4790
    /**
4791
     * Replaces all occurrences of $search in $str by $replacement.
4792
     *
4793
     * @param string $str            <p>The input string.</p>
4794
     * @param string $search         <p>The needle to search for.</p>
4795
     * @param string $replacement    <p>The string to replace with.</p>
4796
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4797
     *
4798
     * @return string
4799
     *                <p>A string with replaced parts.</p>
4800
     */
4801 29
    public static function replace(
4802
        string $str,
4803
        string $search,
4804
        string $replacement,
4805
        bool $case_sensitive = true
4806
    ): string {
4807 29
        if ($case_sensitive) {
4808 22
            return \str_replace($search, $replacement, $str);
4809
        }
4810
4811 7
        return self::str_ireplace($search, $replacement, $str);
4812
    }
4813
4814
    /**
4815
     * Replaces all occurrences of $search in $str by $replacement.
4816
     *
4817
     * @param string       $str            <p>The input string.</p>
4818
     * @param array        $search         <p>The elements to search for.</p>
4819
     * @param array|string $replacement    <p>The string to replace with.</p>
4820
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4821
     *
4822
     * @return string
4823
     *                <p>A string with replaced parts.</p>
4824
     */
4825 30
    public static function replace_all(
4826
        string $str,
4827
        array $search,
4828
        $replacement,
4829
        bool $case_sensitive = true
4830
    ): string {
4831 30
        if ($case_sensitive) {
4832 23
            return \str_replace($search, $replacement, $str);
4833
        }
4834
4835 7
        return self::str_ireplace($search, $replacement, $str);
4836
    }
4837
4838
    /**
4839
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4840
     *
4841
     * @param string $str                        <p>The input string</p>
4842
     * @param string $replacement_char           <p>The replacement character.</p>
4843
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4844
     *
4845
     * @return string
4846
     *                <p>A string without diamond question marks (�).</p>
4847
     */
4848 35
    public static function replace_diamond_question_mark(
4849
        string $str,
4850
        string $replacement_char = '',
4851
        bool $process_invalid_utf8_chars = true
4852
    ): string {
4853 35
        if ($str === '') {
4854 9
            return '';
4855
        }
4856
4857 35
        if ($process_invalid_utf8_chars === true) {
4858 35
            $replacement_char_helper = $replacement_char;
4859 35
            if ($replacement_char === '') {
4860 35
                $replacement_char_helper = 'none';
4861
            }
4862
4863 35
            if (self::$SUPPORT['mbstring'] === false) {
4864
                // if there is no native support for "mbstring",
4865
                // then we need to clean the string before ...
4866
                $str = self::clean($str);
4867
            }
4868
4869 35
            $save = \mb_substitute_character();
4870 35
            \mb_substitute_character($replacement_char_helper);
4871
            // the polyfill maybe return false, so cast to string
4872 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4873 35
            \mb_substitute_character($save);
4874
        }
4875
4876 35
        return \str_replace(
4877
            [
4878 35
                "\xEF\xBF\xBD",
4879
                '�',
4880
            ],
4881
            [
4882 35
                $replacement_char,
4883 35
                $replacement_char,
4884
            ],
4885 35
            $str
4886
        );
4887
    }
4888
4889
    /**
4890
     * Strip whitespace or other characters from the end of a UTF-8 string.
4891
     *
4892
     * @param string      $str   <p>The string to be trimmed.</p>
4893
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4894
     *
4895
     * @return string
4896
     *                <p>A string with unwanted characters stripped from the right.</p>
4897
     */
4898 20
    public static function rtrim(string $str = '', string $chars = null): string
4899
    {
4900 20
        if ($str === '') {
4901 3
            return '';
4902
        }
4903
4904 19
        if (self::$SUPPORT['mbstring'] === true) {
4905 19
            if ($chars) {
4906
                /** @noinspection PregQuoteUsageInspection */
4907 8
                $chars = \preg_quote($chars);
4908 8
                $pattern = "[${chars}]+$";
4909
            } else {
4910 14
                $pattern = '[\\s]+$';
4911
            }
4912
4913
            /** @noinspection PhpComposerExtensionStubsInspection */
4914 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4915
        }
4916
4917
        if ($chars) {
4918
            $chars = \preg_quote($chars, '/');
4919
            $pattern = "[${chars}]+$";
4920
        } else {
4921
            $pattern = '[\\s]+$';
4922
        }
4923
4924
        return self::regex_replace($str, $pattern, '', '', '/');
4925
    }
4926
4927
    /**
4928
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4929
     *
4930
     * @psalm-suppress MissingReturnType
4931
     */
4932 2
    public static function showSupport()
4933
    {
4934 2
        echo '<pre>';
4935 2
        foreach (self::$SUPPORT as $key => &$value) {
4936 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4937
        }
4938 2
        unset($value);
4939 2
        echo '</pre>';
4940 2
    }
4941
4942
    /**
4943
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4944
     *
4945
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4946
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4947
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4948
     *
4949
     * @return string
4950
     *                <p>The HTML numbered entity for the given character.</p>
4951
     */
4952 2
    public static function single_chr_html_encode(
4953
        string $char,
4954
        bool $keep_ascii_chars = false,
4955
        string $encoding = 'UTF-8'
4956
    ): string {
4957 2
        if ($char === '') {
4958 2
            return '';
4959
        }
4960
4961
        if (
4962 2
            $keep_ascii_chars === true
4963
            &&
4964 2
            ASCII::is_ascii($char) === true
4965
        ) {
4966 2
            return $char;
4967
        }
4968
4969 2
        return '&#' . self::ord($char, $encoding) . ';';
4970
    }
4971
4972
    /**
4973
     * @param string $str
4974
     * @param int    $tab_length
4975
     *
4976
     * @return string
4977
     */
4978 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
4979
    {
4980 5
        if ($tab_length === 4) {
4981 3
            $tab = '    ';
4982 2
        } elseif ($tab_length === 2) {
4983 1
            $tab = '  ';
4984
        } else {
4985 1
            $tab = \str_repeat(' ', $tab_length);
4986
        }
4987
4988 5
        return \str_replace($tab, "\t", $str);
4989
    }
4990
4991
    /**
4992
     * alias for "UTF8::str_split()"
4993
     *
4994
     * @param string|string[] $str
4995
     * @param int             $length
4996
     * @param bool            $clean_utf8
4997
     *
4998
     * @return string[]
4999
     *
5000
     * @see UTF8::str_split()
5001
     * @deprecated <p>please use "UTF8::str_split()"</p>
5002
     */
5003 9
    public static function split(
5004
        $str,
5005
        int $length = 1,
5006
        bool $clean_utf8 = false
5007
    ): array {
5008 9
        return self::str_split($str, $length, $clean_utf8);
5009
    }
5010
5011
    /**
5012
     * alias for "UTF8::str_starts_with()"
5013
     *
5014
     * @param string $haystack
5015
     * @param string $needle
5016
     *
5017
     * @return bool
5018
     *
5019
     * @see UTF8::str_starts_with()
5020
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5021
     */
5022
    public static function str_begins(string $haystack, string $needle): bool
5023
    {
5024
        return self::str_starts_with($haystack, $needle);
5025
    }
5026
5027
    /**
5028
     * Returns a camelCase version of the string. Trims surrounding spaces,
5029
     * capitalizes letters following digits, spaces, dashes and underscores,
5030
     * and removes spaces, dashes, as well as underscores.
5031
     *
5032
     * @param string      $str                           <p>The input string.</p>
5033
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5034
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5035
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5036
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5037
     *
5038
     * @return string
5039
     */
5040 32
    public static function str_camelize(
5041
        string $str,
5042
        string $encoding = 'UTF-8',
5043
        bool $clean_utf8 = false,
5044
        string $lang = null,
5045
        bool $try_to_keep_the_string_length = false
5046
    ): string {
5047 32
        if ($clean_utf8 === true) {
5048
            $str = self::clean($str);
5049
        }
5050
5051 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5052 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5053
        }
5054
5055 32
        $str = self::lcfirst(
5056 32
            \trim($str),
5057 32
            $encoding,
5058 32
            false,
5059 32
            $lang,
5060 32
            $try_to_keep_the_string_length
5061
        );
5062 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5063
5064 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5065
5066 32
        $str = (string) \preg_replace_callback(
5067 32
            '/[-_\\s]+(.)?/u',
5068
            /**
5069
             * @param array $match
5070
             *
5071
             * @return string
5072
             */
5073
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5074 27
                if (isset($match[1])) {
5075 27
                    if ($use_mb_functions === true) {
5076 27
                        if ($encoding === 'UTF-8') {
5077 27
                            return \mb_strtoupper($match[1]);
5078
                        }
5079
5080
                        return \mb_strtoupper($match[1], $encoding);
5081
                    }
5082
5083
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5084
                }
5085
5086 1
                return '';
5087 32
            },
5088 32
            $str
5089
        );
5090
5091 32
        return (string) \preg_replace_callback(
5092 32
            '/[\\p{N}]+(.)?/u',
5093
            /**
5094
             * @param array $match
5095
             *
5096
             * @return string
5097
             */
5098
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5099 6
                if ($use_mb_functions === true) {
5100 6
                    if ($encoding === 'UTF-8') {
5101 6
                        return \mb_strtoupper($match[0]);
5102
                    }
5103
5104
                    return \mb_strtoupper($match[0], $encoding);
5105
                }
5106
5107
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5108 32
            },
5109 32
            $str
5110
        );
5111
    }
5112
5113
    /**
5114
     * Returns the string with the first letter of each word capitalized,
5115
     * except for when the word is a name which shouldn't be capitalized.
5116
     *
5117
     * @param string $str
5118
     *
5119
     * @return string
5120
     *                <p>A string with $str capitalized.</p>
5121
     */
5122 1
    public static function str_capitalize_name(string $str): string
5123
    {
5124 1
        return self::str_capitalize_name_helper(
5125 1
            self::str_capitalize_name_helper(
5126 1
                self::collapse_whitespace($str),
5127 1
                ' '
5128
            ),
5129 1
            '-'
5130
        );
5131
    }
5132
5133
    /**
5134
     * Returns true if the string contains $needle, false otherwise. By default
5135
     * the comparison is case-sensitive, but can be made insensitive by setting
5136
     * $case_sensitive to false.
5137
     *
5138
     * @param string $haystack       <p>The input string.</p>
5139
     * @param string $needle         <p>Substring to look for.</p>
5140
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5141
     *
5142
     * @return bool whether or not $haystack contains $needle
5143
     */
5144 21
    public static function str_contains(
5145
        string $haystack,
5146
        string $needle,
5147
        bool $case_sensitive = true
5148
    ): bool {
5149 21
        if ($case_sensitive) {
5150 11
            return \strpos($haystack, $needle) !== false;
5151
        }
5152
5153 10
        return \mb_stripos($haystack, $needle) !== false;
5154
    }
5155
5156
    /**
5157
     * Returns true if the string contains all $needles, false otherwise. By
5158
     * default the comparison is case-sensitive, but can be made insensitive by
5159
     * setting $case_sensitive to false.
5160
     *
5161
     * @param string $haystack       <p>The input string.</p>
5162
     * @param array  $needles        <p>SubStrings to look for.</p>
5163
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5164
     *
5165
     * @return bool whether or not $haystack contains $needle
5166
     */
5167 44
    public static function str_contains_all(
5168
        string $haystack,
5169
        array $needles,
5170
        bool $case_sensitive = true
5171
    ): bool {
5172 44
        if ($haystack === '' || $needles === []) {
5173 1
            return false;
5174
        }
5175
5176
        /** @noinspection LoopWhichDoesNotLoopInspection */
5177 43
        foreach ($needles as &$needle) {
5178 43
            if (!$needle) {
5179 1
                return false;
5180
            }
5181
5182 42
            if ($case_sensitive) {
5183 22
                return \strpos($haystack, $needle) !== false;
5184
            }
5185
5186 20
            return \mb_stripos($haystack, $needle) !== false;
5187
        }
5188
5189
        return true;
5190
    }
5191
5192
    /**
5193
     * Returns true if the string contains any $needles, false otherwise. By
5194
     * default the comparison is case-sensitive, but can be made insensitive by
5195
     * setting $case_sensitive to false.
5196
     *
5197
     * @param string $haystack       <p>The input string.</p>
5198
     * @param array  $needles        <p>SubStrings to look for.</p>
5199
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5200
     *
5201
     * @return bool
5202
     *              Whether or not $str contains $needle
5203
     */
5204 46
    public static function str_contains_any(
5205
        string $haystack,
5206
        array $needles,
5207
        bool $case_sensitive = true
5208
    ): bool {
5209 46
        if ($haystack === '' || $needles === []) {
5210 1
            return false;
5211
        }
5212
5213
        /** @noinspection LoopWhichDoesNotLoopInspection */
5214 45
        foreach ($needles as &$needle) {
5215 45
            if (!$needle) {
5216
                continue;
5217
            }
5218
5219 45
            if ($case_sensitive) {
5220 25
                if (\strpos($haystack, $needle) !== false) {
5221 14
                    return true;
5222
                }
5223
5224 13
                continue;
5225
            }
5226
5227 20
            if (\mb_stripos($haystack, $needle) !== false) {
5228 20
                return true;
5229
            }
5230
        }
5231
5232 19
        return false;
5233
    }
5234
5235
    /**
5236
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5237
     * inserted before uppercase characters (with the exception of the first
5238
     * character of the string), and in place of spaces as well as underscores.
5239
     *
5240
     * @param string $str      <p>The input string.</p>
5241
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5242
     *
5243
     * @return string
5244
     */
5245 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5246
    {
5247 19
        return self::str_delimit($str, '-', $encoding);
5248
    }
5249
5250
    /**
5251
     * Returns a lowercase and trimmed string separated by the given delimiter.
5252
     * Delimiters are inserted before uppercase characters (with the exception
5253
     * of the first character of the string), and in place of spaces, dashes,
5254
     * and underscores. Alpha delimiters are not converted to lowercase.
5255
     *
5256
     * @param string      $str                           <p>The input string.</p>
5257
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5258
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5259
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5260
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5261
     *                                                   tr</p>
5262
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5263
     *                                                   ß</p>
5264
     *
5265
     * @return string
5266
     */
5267 49
    public static function str_delimit(
5268
        string $str,
5269
        string $delimiter,
5270
        string $encoding = 'UTF-8',
5271
        bool $clean_utf8 = false,
5272
        string $lang = null,
5273
        bool $try_to_keep_the_string_length = false
5274
    ): string {
5275 49
        if (self::$SUPPORT['mbstring'] === true) {
5276
            /** @noinspection PhpComposerExtensionStubsInspection */
5277 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5278
5279 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5280 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5281 22
                $str = \mb_strtolower($str);
5282
            } else {
5283 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5284
            }
5285
5286
            /** @noinspection PhpComposerExtensionStubsInspection */
5287 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5288
        }
5289
5290
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5291
5292
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5293
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5294
            $str = \mb_strtolower($str);
5295
        } else {
5296
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5297
        }
5298
5299
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5300
    }
5301
5302
    /**
5303
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5304
     *
5305
     * @param string $str <p>The input string.</p>
5306
     *
5307
     * @return false|string
5308
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5309
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5310
     */
5311 30
    public static function str_detect_encoding($str)
5312
    {
5313
        // init
5314 30
        $str = (string) $str;
5315
5316
        //
5317
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5318
        //
5319
5320 30
        if (self::is_binary($str, true) === true) {
5321 11
            $is_utf32 = self::is_utf32($str, false);
5322 11
            if ($is_utf32 === 1) {
5323
                return 'UTF-32LE';
5324
            }
5325 11
            if ($is_utf32 === 2) {
5326 1
                return 'UTF-32BE';
5327
            }
5328
5329 11
            $is_utf16 = self::is_utf16($str, false);
5330 11
            if ($is_utf16 === 1) {
5331 3
                return 'UTF-16LE';
5332
            }
5333 11
            if ($is_utf16 === 2) {
5334 2
                return 'UTF-16BE';
5335
            }
5336
5337
            // is binary but not "UTF-16" or "UTF-32"
5338 9
            return false;
5339
        }
5340
5341
        //
5342
        // 2.) simple check for ASCII chars
5343
        //
5344
5345 26
        if (ASCII::is_ascii($str) === true) {
5346 10
            return 'ASCII';
5347
        }
5348
5349
        //
5350
        // 3.) simple check for UTF-8 chars
5351
        //
5352
5353 26
        if (self::is_utf8_string($str) === true) {
5354 19
            return 'UTF-8';
5355
        }
5356
5357
        //
5358
        // 4.) check via "mb_detect_encoding()"
5359
        //
5360
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5361
5362
        $encoding_detecting_order = [
5363 15
            'ISO-8859-1',
5364
            'ISO-8859-2',
5365
            'ISO-8859-3',
5366
            'ISO-8859-4',
5367
            'ISO-8859-5',
5368
            'ISO-8859-6',
5369
            'ISO-8859-7',
5370
            'ISO-8859-8',
5371
            'ISO-8859-9',
5372
            'ISO-8859-10',
5373
            'ISO-8859-13',
5374
            'ISO-8859-14',
5375
            'ISO-8859-15',
5376
            'ISO-8859-16',
5377
            'WINDOWS-1251',
5378
            'WINDOWS-1252',
5379
            'WINDOWS-1254',
5380
            'CP932',
5381
            'CP936',
5382
            'CP950',
5383
            'CP866',
5384
            'CP850',
5385
            'CP51932',
5386
            'CP50220',
5387
            'CP50221',
5388
            'CP50222',
5389
            'ISO-2022-JP',
5390
            'ISO-2022-KR',
5391
            'JIS',
5392
            'JIS-ms',
5393
            'EUC-CN',
5394
            'EUC-JP',
5395
        ];
5396
5397 15
        if (self::$SUPPORT['mbstring'] === true) {
5398
            // info: do not use the symfony polyfill here
5399 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5400 15
            if ($encoding) {
5401 15
                return $encoding;
5402
            }
5403
        }
5404
5405
        //
5406
        // 5.) check via "iconv()"
5407
        //
5408
5409
        if (self::$ENCODINGS === null) {
5410
            self::$ENCODINGS = self::getData('encodings');
5411
        }
5412
5413
        foreach (self::$ENCODINGS as $encoding_tmp) {
5414
            // INFO: //IGNORE but still throw notice
5415
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5416
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5417
                return $encoding_tmp;
5418
            }
5419
        }
5420
5421
        return false;
5422
    }
5423
5424
    /**
5425
     * alias for "UTF8::str_ends_with()"
5426
     *
5427
     * @param string $haystack
5428
     * @param string $needle
5429
     *
5430
     * @return bool
5431
     *
5432
     * @see UTF8::str_ends_with()
5433
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
5434
     */
5435
    public static function str_ends(string $haystack, string $needle): bool
5436
    {
5437
        return self::str_ends_with($haystack, $needle);
5438
    }
5439
5440
    /**
5441
     * Check if the string ends with the given substring.
5442
     *
5443
     * @param string $haystack <p>The string to search in.</p>
5444
     * @param string $needle   <p>The substring to search for.</p>
5445
     *
5446
     * @return bool
5447
     */
5448 9
    public static function str_ends_with(string $haystack, string $needle): bool
5449
    {
5450 9
        if ($needle === '') {
5451 2
            return true;
5452
        }
5453
5454 9
        if ($haystack === '') {
5455
            return false;
5456
        }
5457
5458 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5459
    }
5460
5461
    /**
5462
     * Returns true if the string ends with any of $substrings, false otherwise.
5463
     *
5464
     * - case-sensitive
5465
     *
5466
     * @param string   $str        <p>The input string.</p>
5467
     * @param string[] $substrings <p>Substrings to look for.</p>
5468
     *
5469
     * @return bool whether or not $str ends with $substring
5470
     */
5471 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5472
    {
5473 7
        if ($substrings === []) {
5474
            return false;
5475
        }
5476
5477 7
        foreach ($substrings as &$substring) {
5478 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5479 7
                return true;
5480
            }
5481
        }
5482
5483 6
        return false;
5484
    }
5485
5486
    /**
5487
     * Ensures that the string begins with $substring. If it doesn't, it's
5488
     * prepended.
5489
     *
5490
     * @param string $str       <p>The input string.</p>
5491
     * @param string $substring <p>The substring to add if not present.</p>
5492
     *
5493
     * @return string
5494
     */
5495 10
    public static function str_ensure_left(string $str, string $substring): string
5496
    {
5497
        if (
5498 10
            $substring !== ''
5499
            &&
5500 10
            \strpos($str, $substring) === 0
5501
        ) {
5502 6
            return $str;
5503
        }
5504
5505 4
        return $substring . $str;
5506
    }
5507
5508
    /**
5509
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5510
     *
5511
     * @param string $str       <p>The input string.</p>
5512
     * @param string $substring <p>The substring to add if not present.</p>
5513
     *
5514
     * @return string
5515
     */
5516 10
    public static function str_ensure_right(string $str, string $substring): string
5517
    {
5518
        if (
5519 10
            $str === ''
5520
            ||
5521 10
            $substring === ''
5522
            ||
5523 10
            \substr($str, -\strlen($substring)) !== $substring
5524
        ) {
5525 4
            $str .= $substring;
5526
        }
5527
5528 10
        return $str;
5529
    }
5530
5531
    /**
5532
     * Capitalizes the first word of the string, replaces underscores with
5533
     * spaces, and strips '_id'.
5534
     *
5535
     * @param string $str
5536
     *
5537
     * @return string
5538
     */
5539 3
    public static function str_humanize($str): string
5540
    {
5541 3
        $str = \str_replace(
5542
            [
5543 3
                '_id',
5544
                '_',
5545
            ],
5546
            [
5547 3
                '',
5548
                ' ',
5549
            ],
5550 3
            $str
5551
        );
5552
5553 3
        return self::ucfirst(\trim($str));
5554
    }
5555
5556
    /**
5557
     * alias for "UTF8::str_istarts_with()"
5558
     *
5559
     * @param string $haystack
5560
     * @param string $needle
5561
     *
5562
     * @return bool
5563
     *
5564
     * @see UTF8::str_istarts_with()
5565
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
5566
     */
5567
    public static function str_ibegins(string $haystack, string $needle): bool
5568
    {
5569
        return self::str_istarts_with($haystack, $needle);
5570
    }
5571
5572
    /**
5573
     * alias for "UTF8::str_iends_with()"
5574
     *
5575
     * @param string $haystack
5576
     * @param string $needle
5577
     *
5578
     * @return bool
5579
     *
5580
     * @see UTF8::str_iends_with()
5581
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
5582
     */
5583
    public static function str_iends(string $haystack, string $needle): bool
5584
    {
5585
        return self::str_iends_with($haystack, $needle);
5586
    }
5587
5588
    /**
5589
     * Check if the string ends with the given substring, case-insensitive.
5590
     *
5591
     * @param string $haystack <p>The string to search in.</p>
5592
     * @param string $needle   <p>The substring to search for.</p>
5593
     *
5594
     * @return bool
5595
     */
5596 12
    public static function str_iends_with(string $haystack, string $needle): bool
5597
    {
5598 12
        if ($needle === '') {
5599 2
            return true;
5600
        }
5601
5602 12
        if ($haystack === '') {
5603
            return false;
5604
        }
5605
5606 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5607
    }
5608
5609
    /**
5610
     * Returns true if the string ends with any of $substrings, false otherwise.
5611
     *
5612
     * - case-insensitive
5613
     *
5614
     * @param string   $str        <p>The input string.</p>
5615
     * @param string[] $substrings <p>Substrings to look for.</p>
5616
     *
5617
     * @return bool
5618
     *              <p>Whether or not $str ends with $substring.</p>
5619
     */
5620 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5621
    {
5622 4
        if ($substrings === []) {
5623
            return false;
5624
        }
5625
5626 4
        foreach ($substrings as &$substring) {
5627 4
            if (self::str_iends_with($str, $substring)) {
5628 4
                return true;
5629
            }
5630
        }
5631
5632
        return false;
5633
    }
5634
5635
    /**
5636
     * Returns the index of the first occurrence of $needle in the string,
5637
     * and false if not found. Accepts an optional offset from which to begin
5638
     * the search.
5639
     *
5640
     * @param string $str      <p>The input string.</p>
5641
     * @param string $needle   <p>Substring to look for.</p>
5642
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5643
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5644
     *
5645
     * @return false|int
5646
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5647
     *
5648
     * @see UTF8::stripos()
5649
     * @deprecated <p>please use "UTF8::stripos()"</p>
5650
     */
5651
    public static function str_iindex_first(
5652
        string $str,
5653
        string $needle,
5654
        int $offset = 0,
5655
        string $encoding = 'UTF-8'
5656
    ) {
5657
        return self::stripos(
5658
            $str,
5659
            $needle,
5660
            $offset,
5661
            $encoding
5662
        );
5663
    }
5664
5665
    /**
5666
     * Returns the index of the last occurrence of $needle in the string,
5667
     * and false if not found. Accepts an optional offset from which to begin
5668
     * the search. Offsets may be negative to count from the last character
5669
     * in the string.
5670
     *
5671
     * @param string $str      <p>The input string.</p>
5672
     * @param string $needle   <p>Substring to look for.</p>
5673
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5674
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5675
     *
5676
     * @return false|int
5677
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5678
     *
5679
     * @see UTF8::strripos()
5680
     * @deprecated <p>please use "UTF8::strripos()"</p>
5681
     */
5682
    public static function str_iindex_last(
5683
        string $str,
5684
        string $needle,
5685
        int $offset = 0,
5686
        string $encoding = 'UTF-8'
5687
    ) {
5688
        return self::strripos(
5689
            $str,
5690
            $needle,
5691
            $offset,
5692
            $encoding
5693
        );
5694
    }
5695
5696
    /**
5697
     * Returns the index of the first occurrence of $needle in the string,
5698
     * and false if not found. Accepts an optional offset from which to begin
5699
     * the search.
5700
     *
5701
     * @param string $str      <p>The input string.</p>
5702
     * @param string $needle   <p>Substring to look for.</p>
5703
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5704
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5705
     *
5706
     * @return false|int
5707
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5708
     *
5709
     * @see UTF8::strpos()
5710
     * @deprecated <p>please use "UTF8::strpos()"</p>
5711
     */
5712 10
    public static function str_index_first(
5713
        string $str,
5714
        string $needle,
5715
        int $offset = 0,
5716
        string $encoding = 'UTF-8'
5717
    ) {
5718 10
        return self::strpos(
5719 10
            $str,
5720 10
            $needle,
5721 10
            $offset,
5722 10
            $encoding
5723
        );
5724
    }
5725
5726
    /**
5727
     * Returns the index of the last occurrence of $needle in the string,
5728
     * and false if not found. Accepts an optional offset from which to begin
5729
     * the search. Offsets may be negative to count from the last character
5730
     * in the string.
5731
     *
5732
     * @param string $str      <p>The input string.</p>
5733
     * @param string $needle   <p>Substring to look for.</p>
5734
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5735
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5736
     *
5737
     * @return false|int
5738
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5739
     *
5740
     * @see UTF8::strrpos()
5741
     * @deprecated <p>please use "UTF8::strrpos()"</p>
5742
     */
5743 10
    public static function str_index_last(
5744
        string $str,
5745
        string $needle,
5746
        int $offset = 0,
5747
        string $encoding = 'UTF-8'
5748
    ) {
5749 10
        return self::strrpos(
5750 10
            $str,
5751 10
            $needle,
5752 10
            $offset,
5753 10
            $encoding
5754
        );
5755
    }
5756
5757
    /**
5758
     * Inserts $substring into the string at the $index provided.
5759
     *
5760
     * @param string $str       <p>The input string.</p>
5761
     * @param string $substring <p>String to be inserted.</p>
5762
     * @param int    $index     <p>The index at which to insert the substring.</p>
5763
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5764
     *
5765
     * @return string
5766
     */
5767 8
    public static function str_insert(
5768
        string $str,
5769
        string $substring,
5770
        int $index,
5771
        string $encoding = 'UTF-8'
5772
    ): string {
5773 8
        if ($encoding === 'UTF-8') {
5774 4
            $len = (int) \mb_strlen($str);
5775 4
            if ($index > $len) {
5776
                return $str;
5777
            }
5778
5779
            /** @noinspection UnnecessaryCastingInspection */
5780 4
            return (string) \mb_substr($str, 0, $index) .
5781 4
                   $substring .
5782 4
                   (string) \mb_substr($str, $index, $len);
5783
        }
5784
5785 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5786
5787 4
        $len = (int) self::strlen($str, $encoding);
5788 4
        if ($index > $len) {
5789 1
            return $str;
5790
        }
5791
5792 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5793 3
               $substring .
5794 3
               ((string) self::substr($str, $index, $len, $encoding));
5795
    }
5796
5797
    /**
5798
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5799
     *
5800
     * @see http://php.net/manual/en/function.str-ireplace.php
5801
     *
5802
     * @param mixed $search  <p>
5803
     *                       Every replacement with search array is
5804
     *                       performed on the result of previous replacement.
5805
     *                       </p>
5806
     * @param mixed $replace <p>
5807
     *                       </p>
5808
     * @param mixed $subject <p>
5809
     *                       If subject is an array, then the search and
5810
     *                       replace is performed with every entry of
5811
     *                       subject, and the return value is an array as
5812
     *                       well.
5813
     *                       </p>
5814
     * @param int   $count   [optional] <p>
5815
     *                       The number of matched and replaced needles will
5816
     *                       be returned in count which is passed by
5817
     *                       reference.
5818
     *                       </p>
5819
     *
5820
     * @return mixed a string or an array of replacements
5821
     */
5822 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5823
    {
5824 29
        $search = (array) $search;
5825
5826
        /** @noinspection AlterInForeachInspection */
5827 29
        foreach ($search as &$s) {
5828 29
            $s = (string) $s;
5829 29
            if ($s === '') {
5830 6
                $s = '/^(?<=.)$/';
5831
            } else {
5832 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5833
            }
5834
        }
5835
5836 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5837 29
        $count = $replace; // used as reference parameter
5838
5839 29
        return $subject;
5840
    }
5841
5842
    /**
5843
     * Replaces $search from the beginning of string with $replacement.
5844
     *
5845
     * @param string $str         <p>The input string.</p>
5846
     * @param string $search      <p>The string to search for.</p>
5847
     * @param string $replacement <p>The replacement.</p>
5848
     *
5849
     * @return string string after the replacements
5850
     */
5851 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5852
    {
5853 17
        if ($str === '') {
5854 4
            if ($replacement === '') {
5855 2
                return '';
5856
            }
5857
5858 2
            if ($search === '') {
5859 2
                return $replacement;
5860
            }
5861
        }
5862
5863 13
        if ($search === '') {
5864 2
            return $str . $replacement;
5865
        }
5866
5867 11
        if (\stripos($str, $search) === 0) {
5868 10
            return $replacement . \substr($str, \strlen($search));
5869
        }
5870
5871 1
        return $str;
5872
    }
5873
5874
    /**
5875
     * Replaces $search from the ending of string with $replacement.
5876
     *
5877
     * @param string $str         <p>The input string.</p>
5878
     * @param string $search      <p>The string to search for.</p>
5879
     * @param string $replacement <p>The replacement.</p>
5880
     *
5881
     * @return string
5882
     *                <p>string after the replacements.</p>
5883
     */
5884 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5885
    {
5886 17
        if ($str === '') {
5887 4
            if ($replacement === '') {
5888 2
                return '';
5889
            }
5890
5891 2
            if ($search === '') {
5892 2
                return $replacement;
5893
            }
5894
        }
5895
5896 13
        if ($search === '') {
5897 2
            return $str . $replacement;
5898
        }
5899
5900 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5901 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5902
        }
5903
5904 11
        return $str;
5905
    }
5906
5907
    /**
5908
     * Check if the string starts with the given substring, case-insensitive.
5909
     *
5910
     * @param string $haystack <p>The string to search in.</p>
5911
     * @param string $needle   <p>The substring to search for.</p>
5912
     *
5913
     * @return bool
5914
     */
5915 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5916
    {
5917 12
        if ($needle === '') {
5918 2
            return true;
5919
        }
5920
5921 12
        if ($haystack === '') {
5922
            return false;
5923
        }
5924
5925 12
        return self::stripos($haystack, $needle) === 0;
5926
    }
5927
5928
    /**
5929
     * Returns true if the string begins with any of $substrings, false otherwise.
5930
     *
5931
     * - case-insensitive
5932
     *
5933
     * @param string $str        <p>The input string.</p>
5934
     * @param array  $substrings <p>Substrings to look for.</p>
5935
     *
5936
     * @return bool whether or not $str starts with $substring
5937
     */
5938 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5939
    {
5940 4
        if ($str === '') {
5941
            return false;
5942
        }
5943
5944 4
        if ($substrings === []) {
5945
            return false;
5946
        }
5947
5948 4
        foreach ($substrings as &$substring) {
5949 4
            if (self::str_istarts_with($str, $substring)) {
5950 4
                return true;
5951
            }
5952
        }
5953
5954
        return false;
5955
    }
5956
5957
    /**
5958
     * Gets the substring after the first occurrence of a separator.
5959
     *
5960
     * @param string $str       <p>The input string.</p>
5961
     * @param string $separator <p>The string separator.</p>
5962
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5963
     *
5964
     * @return string
5965
     */
5966 1
    public static function str_isubstr_after_first_separator(
5967
        string $str,
5968
        string $separator,
5969
        string $encoding = 'UTF-8'
5970
    ): string {
5971 1
        if ($separator === '' || $str === '') {
5972 1
            return '';
5973
        }
5974
5975 1
        $offset = self::stripos($str, $separator);
5976 1
        if ($offset === false) {
5977 1
            return '';
5978
        }
5979
5980 1
        if ($encoding === 'UTF-8') {
5981 1
            return (string) \mb_substr(
5982 1
                $str,
5983 1
                $offset + (int) \mb_strlen($separator)
5984
            );
5985
        }
5986
5987
        return (string) self::substr(
5988
            $str,
5989
            $offset + (int) self::strlen($separator, $encoding),
5990
            null,
5991
            $encoding
5992
        );
5993
    }
5994
5995
    /**
5996
     * Gets the substring after the last occurrence of a separator.
5997
     *
5998
     * @param string $str       <p>The input string.</p>
5999
     * @param string $separator <p>The string separator.</p>
6000
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6001
     *
6002
     * @return string
6003
     */
6004 1
    public static function str_isubstr_after_last_separator(
6005
        string $str,
6006
        string $separator,
6007
        string $encoding = 'UTF-8'
6008
    ): string {
6009 1
        if ($separator === '' || $str === '') {
6010 1
            return '';
6011
        }
6012
6013 1
        $offset = self::strripos($str, $separator);
6014 1
        if ($offset === false) {
6015 1
            return '';
6016
        }
6017
6018 1
        if ($encoding === 'UTF-8') {
6019 1
            return (string) \mb_substr(
6020 1
                $str,
6021 1
                $offset + (int) self::strlen($separator)
6022
            );
6023
        }
6024
6025
        return (string) self::substr(
6026
            $str,
6027
            $offset + (int) self::strlen($separator, $encoding),
6028
            null,
6029
            $encoding
6030
        );
6031
    }
6032
6033
    /**
6034
     * Gets the substring before the first occurrence of a separator.
6035
     *
6036
     * @param string $str       <p>The input string.</p>
6037
     * @param string $separator <p>The string separator.</p>
6038
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6039
     *
6040
     * @return string
6041
     */
6042 1
    public static function str_isubstr_before_first_separator(
6043
        string $str,
6044
        string $separator,
6045
        string $encoding = 'UTF-8'
6046
    ): string {
6047 1
        if ($separator === '' || $str === '') {
6048 1
            return '';
6049
        }
6050
6051 1
        $offset = self::stripos($str, $separator);
6052 1
        if ($offset === false) {
6053 1
            return '';
6054
        }
6055
6056 1
        if ($encoding === 'UTF-8') {
6057 1
            return (string) \mb_substr($str, 0, $offset);
6058
        }
6059
6060
        return (string) self::substr($str, 0, $offset, $encoding);
6061
    }
6062
6063
    /**
6064
     * Gets the substring before the last occurrence of a separator.
6065
     *
6066
     * @param string $str       <p>The input string.</p>
6067
     * @param string $separator <p>The string separator.</p>
6068
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6069
     *
6070
     * @return string
6071
     */
6072 1
    public static function str_isubstr_before_last_separator(
6073
        string $str,
6074
        string $separator,
6075
        string $encoding = 'UTF-8'
6076
    ): string {
6077 1
        if ($separator === '' || $str === '') {
6078 1
            return '';
6079
        }
6080
6081 1
        if ($encoding === 'UTF-8') {
6082 1
            $offset = \mb_strripos($str, $separator);
6083 1
            if ($offset === false) {
6084 1
                return '';
6085
            }
6086
6087 1
            return (string) \mb_substr($str, 0, $offset);
6088
        }
6089
6090
        $offset = self::strripos($str, $separator, 0, $encoding);
6091
        if ($offset === false) {
6092
            return '';
6093
        }
6094
6095
        return (string) self::substr($str, 0, $offset, $encoding);
6096
    }
6097
6098
    /**
6099
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6100
     *
6101
     * @param string $str           <p>The input string.</p>
6102
     * @param string $needle        <p>The string to look for.</p>
6103
     * @param bool   $before_needle [optional] <p>Default: false</p>
6104
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6105
     *
6106
     * @return string
6107
     */
6108 2
    public static function str_isubstr_first(
6109
        string $str,
6110
        string $needle,
6111
        bool $before_needle = false,
6112
        string $encoding = 'UTF-8'
6113
    ): string {
6114
        if (
6115 2
            $needle === ''
6116
            ||
6117 2
            $str === ''
6118
        ) {
6119 2
            return '';
6120
        }
6121
6122 2
        $part = self::stristr(
6123 2
            $str,
6124 2
            $needle,
6125 2
            $before_needle,
6126 2
            $encoding
6127
        );
6128 2
        if ($part === false) {
6129 2
            return '';
6130
        }
6131
6132 2
        return $part;
6133
    }
6134
6135
    /**
6136
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6137
     *
6138
     * @param string $str           <p>The input string.</p>
6139
     * @param string $needle        <p>The string to look for.</p>
6140
     * @param bool   $before_needle [optional] <p>Default: false</p>
6141
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6142
     *
6143
     * @return string
6144
     */
6145 1
    public static function str_isubstr_last(
6146
        string $str,
6147
        string $needle,
6148
        bool $before_needle = false,
6149
        string $encoding = 'UTF-8'
6150
    ): string {
6151
        if (
6152 1
            $needle === ''
6153
            ||
6154 1
            $str === ''
6155
        ) {
6156 1
            return '';
6157
        }
6158
6159 1
        $part = self::strrichr(
6160 1
            $str,
6161 1
            $needle,
6162 1
            $before_needle,
6163 1
            $encoding
6164
        );
6165 1
        if ($part === false) {
6166 1
            return '';
6167
        }
6168
6169 1
        return $part;
6170
    }
6171
6172
    /**
6173
     * Returns the last $n characters of the string.
6174
     *
6175
     * @param string $str      <p>The input string.</p>
6176
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6177
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6178
     *
6179
     * @return string
6180
     */
6181 12
    public static function str_last_char(
6182
        string $str,
6183
        int $n = 1,
6184
        string $encoding = 'UTF-8'
6185
    ): string {
6186 12
        if ($str === '' || $n <= 0) {
6187 4
            return '';
6188
        }
6189
6190 8
        if ($encoding === 'UTF-8') {
6191 4
            return (string) \mb_substr($str, -$n);
6192
        }
6193
6194 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6195
6196 4
        return (string) self::substr($str, -$n, null, $encoding);
6197
    }
6198
6199
    /**
6200
     * Limit the number of characters in a string.
6201
     *
6202
     * @param string $str        <p>The input string.</p>
6203
     * @param int    $length     [optional] <p>Default: 100</p>
6204
     * @param string $str_add_on [optional] <p>Default: …</p>
6205
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6206
     *
6207
     * @return string
6208
     */
6209 2
    public static function str_limit(
6210
        string $str,
6211
        int $length = 100,
6212
        string $str_add_on = '…',
6213
        string $encoding = 'UTF-8'
6214
    ): string {
6215 2
        if ($str === '' || $length <= 0) {
6216 2
            return '';
6217
        }
6218
6219 2
        if ($encoding === 'UTF-8') {
6220 2
            if ((int) \mb_strlen($str) <= $length) {
6221 2
                return $str;
6222
            }
6223
6224
            /** @noinspection UnnecessaryCastingInspection */
6225 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6226
        }
6227
6228
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6229
6230
        if ((int) self::strlen($str, $encoding) <= $length) {
6231
            return $str;
6232
        }
6233
6234
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6235
    }
6236
6237
    /**
6238
     * Limit the number of characters in a string, but also after the next word.
6239
     *
6240
     * @param string $str        <p>The input string.</p>
6241
     * @param int    $length     [optional] <p>Default: 100</p>
6242
     * @param string $str_add_on [optional] <p>Default: …</p>
6243
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6244
     *
6245
     * @return string
6246
     */
6247 6
    public static function str_limit_after_word(
6248
        string $str,
6249
        int $length = 100,
6250
        string $str_add_on = '…',
6251
        string $encoding = 'UTF-8'
6252
    ): string {
6253 6
        if ($str === '' || $length <= 0) {
6254 2
            return '';
6255
        }
6256
6257 6
        if ($encoding === 'UTF-8') {
6258
            /** @noinspection UnnecessaryCastingInspection */
6259 2
            if ((int) \mb_strlen($str) <= $length) {
6260 2
                return $str;
6261
            }
6262
6263 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6264 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6265
            }
6266
6267 2
            $str = \mb_substr($str, 0, $length);
6268
6269 2
            $array = \explode(' ', $str);
6270 2
            \array_pop($array);
6271 2
            $new_str = \implode(' ', $array);
6272
6273 2
            if ($new_str === '') {
6274 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6275
            }
6276
        } else {
6277 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6278
                return $str;
6279
            }
6280
6281 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6282 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6283
            }
6284
6285
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6286 1
            $str = self::substr($str, 0, $length, $encoding);
6287
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6288 1
            if ($str === false) {
6289
                return '' . $str_add_on;
6290
            }
6291
6292 1
            $array = \explode(' ', $str);
6293 1
            \array_pop($array);
6294 1
            $new_str = \implode(' ', $array);
6295
6296 1
            if ($new_str === '') {
6297
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6298
            }
6299
        }
6300
6301 3
        return $new_str . $str_add_on;
6302
    }
6303
6304
    /**
6305
     * Returns the longest common prefix between the $str1 and $str2.
6306
     *
6307
     * @param string $str1     <p>The input sting.</p>
6308
     * @param string $str2     <p>Second string for comparison.</p>
6309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6310
     *
6311
     * @return string
6312
     */
6313 10
    public static function str_longest_common_prefix(
6314
        string $str1,
6315
        string $str2,
6316
        string $encoding = 'UTF-8'
6317
    ): string {
6318
        // init
6319 10
        $longest_common_prefix = '';
6320
6321 10
        if ($encoding === 'UTF-8') {
6322 5
            $max_length = (int) \min(
6323 5
                \mb_strlen($str1),
6324 5
                \mb_strlen($str2)
6325
            );
6326
6327 5
            for ($i = 0; $i < $max_length; ++$i) {
6328 4
                $char = \mb_substr($str1, $i, 1);
6329
6330
                if (
6331 4
                    $char !== false
6332
                    &&
6333 4
                    $char === \mb_substr($str2, $i, 1)
6334
                ) {
6335 3
                    $longest_common_prefix .= $char;
6336
                } else {
6337 3
                    break;
6338
                }
6339
            }
6340
        } else {
6341 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6342
6343 5
            $max_length = (int) \min(
6344 5
                self::strlen($str1, $encoding),
6345 5
                self::strlen($str2, $encoding)
6346
            );
6347
6348 5
            for ($i = 0; $i < $max_length; ++$i) {
6349 4
                $char = self::substr($str1, $i, 1, $encoding);
6350
6351
                if (
6352 4
                    $char !== false
6353
                    &&
6354 4
                    $char === self::substr($str2, $i, 1, $encoding)
6355
                ) {
6356 3
                    $longest_common_prefix .= $char;
6357
                } else {
6358 3
                    break;
6359
                }
6360
            }
6361
        }
6362
6363 10
        return $longest_common_prefix;
6364
    }
6365
6366
    /**
6367
     * Returns the longest common substring between the $str1 and $str2.
6368
     * In the case of ties, it returns that which occurs first.
6369
     *
6370
     * @param string $str1
6371
     * @param string $str2     <p>Second string for comparison.</p>
6372
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6373
     *
6374
     * @return string
6375
     *                <p>A string with its $str being the longest common substring.</p>
6376
     */
6377 11
    public static function str_longest_common_substring(
6378
        string $str1,
6379
        string $str2,
6380
        string $encoding = 'UTF-8'
6381
    ): string {
6382 11
        if ($str1 === '' || $str2 === '') {
6383 2
            return '';
6384
        }
6385
6386
        // Uses dynamic programming to solve
6387
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6388
6389 9
        if ($encoding === 'UTF-8') {
6390 4
            $str_length = (int) \mb_strlen($str1);
6391 4
            $other_length = (int) \mb_strlen($str2);
6392
        } else {
6393 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6394
6395 5
            $str_length = (int) self::strlen($str1, $encoding);
6396 5
            $other_length = (int) self::strlen($str2, $encoding);
6397
        }
6398
6399
        // Return if either string is empty
6400 9
        if ($str_length === 0 || $other_length === 0) {
6401
            return '';
6402
        }
6403
6404 9
        $len = 0;
6405 9
        $end = 0;
6406 9
        $table = \array_fill(
6407 9
            0,
6408 9
            $str_length + 1,
6409 9
            \array_fill(0, $other_length + 1, 0)
6410
        );
6411
6412 9
        if ($encoding === 'UTF-8') {
6413 9
            for ($i = 1; $i <= $str_length; ++$i) {
6414 9
                for ($j = 1; $j <= $other_length; ++$j) {
6415 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6416 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6417
6418 9
                    if ($str_char === $other_char) {
6419 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6420 8
                        if ($table[$i][$j] > $len) {
6421 8
                            $len = $table[$i][$j];
6422 8
                            $end = $i;
6423
                        }
6424
                    } else {
6425 9
                        $table[$i][$j] = 0;
6426
                    }
6427
                }
6428
            }
6429
        } else {
6430
            for ($i = 1; $i <= $str_length; ++$i) {
6431
                for ($j = 1; $j <= $other_length; ++$j) {
6432
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6433
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6434
6435
                    if ($str_char === $other_char) {
6436
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6437
                        if ($table[$i][$j] > $len) {
6438
                            $len = $table[$i][$j];
6439
                            $end = $i;
6440
                        }
6441
                    } else {
6442
                        $table[$i][$j] = 0;
6443
                    }
6444
                }
6445
            }
6446
        }
6447
6448 9
        if ($encoding === 'UTF-8') {
6449 9
            return (string) \mb_substr($str1, $end - $len, $len);
6450
        }
6451
6452
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6453
    }
6454
6455
    /**
6456
     * Returns the longest common suffix between the $str1 and $str2.
6457
     *
6458
     * @param string $str1
6459
     * @param string $str2     <p>Second string for comparison.</p>
6460
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6461
     *
6462
     * @return string
6463
     */
6464 10
    public static function str_longest_common_suffix(
6465
        string $str1,
6466
        string $str2,
6467
        string $encoding = 'UTF-8'
6468
    ): string {
6469 10
        if ($str1 === '' || $str2 === '') {
6470 2
            return '';
6471
        }
6472
6473 8
        if ($encoding === 'UTF-8') {
6474 4
            $max_length = (int) \min(
6475 4
                \mb_strlen($str1, $encoding),
6476 4
                \mb_strlen($str2, $encoding)
6477
            );
6478
6479 4
            $longest_common_suffix = '';
6480 4
            for ($i = 1; $i <= $max_length; ++$i) {
6481 4
                $char = \mb_substr($str1, -$i, 1);
6482
6483
                if (
6484 4
                    $char !== false
6485
                    &&
6486 4
                    $char === \mb_substr($str2, -$i, 1)
6487
                ) {
6488 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6489
                } else {
6490 3
                    break;
6491
                }
6492
            }
6493
        } else {
6494 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6495
6496 4
            $max_length = (int) \min(
6497 4
                self::strlen($str1, $encoding),
6498 4
                self::strlen($str2, $encoding)
6499
            );
6500
6501 4
            $longest_common_suffix = '';
6502 4
            for ($i = 1; $i <= $max_length; ++$i) {
6503 4
                $char = self::substr($str1, -$i, 1, $encoding);
6504
6505
                if (
6506 4
                    $char !== false
6507
                    &&
6508 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6509
                ) {
6510 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6511
                } else {
6512 3
                    break;
6513
                }
6514
            }
6515
        }
6516
6517 8
        return $longest_common_suffix;
6518
    }
6519
6520
    /**
6521
     * Returns true if $str matches the supplied pattern, false otherwise.
6522
     *
6523
     * @param string $str     <p>The input string.</p>
6524
     * @param string $pattern <p>Regex pattern to match against.</p>
6525
     *
6526
     * @return bool whether or not $str matches the pattern
6527
     */
6528
    public static function str_matches_pattern(string $str, string $pattern): bool
6529
    {
6530
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6531
    }
6532
6533
    /**
6534
     * Returns whether or not a character exists at an index. Offsets may be
6535
     * negative to count from the last character in the string. Implements
6536
     * part of the ArrayAccess interface.
6537
     *
6538
     * @param string $str      <p>The input string.</p>
6539
     * @param int    $offset   <p>The index to check.</p>
6540
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6541
     *
6542
     * @return bool whether or not the index exists
6543
     */
6544 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6545
    {
6546
        // init
6547 6
        $length = (int) self::strlen($str, $encoding);
6548
6549 6
        if ($offset >= 0) {
6550 3
            return $length > $offset;
6551
        }
6552
6553 3
        return $length >= \abs($offset);
6554
    }
6555
6556
    /**
6557
     * Returns the character at the given index. Offsets may be negative to
6558
     * count from the last character in the string. Implements part of the
6559
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6560
     * does not exist.
6561
     *
6562
     * @param string $str      <p>The input string.</p>
6563
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6564
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6565
     *
6566
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6567
     *
6568
     * @return string
6569
     *                <p>The character at the specified index.</p>
6570
     */
6571 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6572
    {
6573
        // init
6574 2
        $length = (int) self::strlen($str);
6575
6576
        if (
6577 2
            ($index >= 0 && $length <= $index)
6578
            ||
6579 2
            $length < \abs($index)
6580
        ) {
6581 1
            throw new \OutOfBoundsException('No character exists at the index');
6582
        }
6583
6584 1
        return self::char_at($str, $index, $encoding);
6585
    }
6586
6587
    /**
6588
     * Pad a UTF-8 string to a given length with another string.
6589
     *
6590
     * @param string     $str        <p>The input string.</p>
6591
     * @param int        $pad_length <p>The length of return string.</p>
6592
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6593
     * @param int|string $pad_type   [optional] <p>
6594
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6595
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6596
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6597
     *                               </p>
6598
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6599
     *
6600
     * @return string
6601
     *                <p>Returns the padded string.</p>
6602
     */
6603 41
    public static function str_pad(
6604
        string $str,
6605
        int $pad_length,
6606
        string $pad_string = ' ',
6607
        $pad_type = \STR_PAD_RIGHT,
6608
        string $encoding = 'UTF-8'
6609
    ): string {
6610 41
        if ($pad_length === 0 || $pad_string === '') {
6611 1
            return $str;
6612
        }
6613
6614 41
        if ($pad_type !== (int) $pad_type) {
6615 13
            if ($pad_type === 'left') {
6616 3
                $pad_type = \STR_PAD_LEFT;
6617 10
            } elseif ($pad_type === 'right') {
6618 6
                $pad_type = \STR_PAD_RIGHT;
6619 4
            } elseif ($pad_type === 'both') {
6620 3
                $pad_type = \STR_PAD_BOTH;
6621
            } else {
6622 1
                throw new \InvalidArgumentException(
6623 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6624
                );
6625
            }
6626
        }
6627
6628 40
        if ($encoding === 'UTF-8') {
6629 25
            $str_length = (int) \mb_strlen($str);
6630
6631 25
            if ($pad_length >= $str_length) {
6632
                switch ($pad_type) {
6633 25
                    case \STR_PAD_LEFT:
6634 8
                        $ps_length = (int) \mb_strlen($pad_string);
6635
6636 8
                        $diff = ($pad_length - $str_length);
6637
6638 8
                        $pre = (string) \mb_substr(
6639 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6640 8
                            0,
6641 8
                            $diff
6642
                        );
6643 8
                        $post = '';
6644
6645 8
                        break;
6646
6647 20
                    case \STR_PAD_BOTH:
6648 14
                        $diff = ($pad_length - $str_length);
6649
6650 14
                        $ps_length_left = (int) \floor($diff / 2);
6651
6652 14
                        $ps_length_right = (int) \ceil($diff / 2);
6653
6654 14
                        $pre = (string) \mb_substr(
6655 14
                            \str_repeat($pad_string, $ps_length_left),
6656 14
                            0,
6657 14
                            $ps_length_left
6658
                        );
6659 14
                        $post = (string) \mb_substr(
6660 14
                            \str_repeat($pad_string, $ps_length_right),
6661 14
                            0,
6662 14
                            $ps_length_right
6663
                        );
6664
6665 14
                        break;
6666
6667 9
                    case \STR_PAD_RIGHT:
6668
                    default:
6669 9
                        $ps_length = (int) \mb_strlen($pad_string);
6670
6671 9
                        $diff = ($pad_length - $str_length);
6672
6673 9
                        $post = (string) \mb_substr(
6674 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6675 9
                            0,
6676 9
                            $diff
6677
                        );
6678 9
                        $pre = '';
6679
                }
6680
6681 25
                return $pre . $str . $post;
6682
            }
6683
6684 3
            return $str;
6685
        }
6686
6687 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6688
6689 15
        $str_length = (int) self::strlen($str, $encoding);
6690
6691 15
        if ($pad_length >= $str_length) {
6692
            switch ($pad_type) {
6693 14
                case \STR_PAD_LEFT:
6694 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6695
6696 5
                    $diff = ($pad_length - $str_length);
6697
6698 5
                    $pre = (string) self::substr(
6699 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6700 5
                        0,
6701 5
                        $diff,
6702 5
                        $encoding
6703
                    );
6704 5
                    $post = '';
6705
6706 5
                    break;
6707
6708 9
                case \STR_PAD_BOTH:
6709 3
                    $diff = ($pad_length - $str_length);
6710
6711 3
                    $ps_length_left = (int) \floor($diff / 2);
6712
6713 3
                    $ps_length_right = (int) \ceil($diff / 2);
6714
6715 3
                    $pre = (string) self::substr(
6716 3
                        \str_repeat($pad_string, $ps_length_left),
6717 3
                        0,
6718 3
                        $ps_length_left,
6719 3
                        $encoding
6720
                    );
6721 3
                    $post = (string) self::substr(
6722 3
                        \str_repeat($pad_string, $ps_length_right),
6723 3
                        0,
6724 3
                        $ps_length_right,
6725 3
                        $encoding
6726
                    );
6727
6728 3
                    break;
6729
6730 6
                case \STR_PAD_RIGHT:
6731
                default:
6732 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6733
6734 6
                    $diff = ($pad_length - $str_length);
6735
6736 6
                    $post = (string) self::substr(
6737 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6738 6
                        0,
6739 6
                        $diff,
6740 6
                        $encoding
6741
                    );
6742 6
                    $pre = '';
6743
            }
6744
6745 14
            return $pre . $str . $post;
6746
        }
6747
6748 1
        return $str;
6749
    }
6750
6751
    /**
6752
     * Returns a new string of a given length such that both sides of the
6753
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
6754
     *
6755
     * @param string $str
6756
     * @param int    $length   <p>Desired string length after padding.</p>
6757
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6758
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6759
     *
6760
     * @return string
6761
     *                <p>The string with padding applied.</p>
6762
     */
6763 11
    public static function str_pad_both(
6764
        string $str,
6765
        int $length,
6766
        string $pad_str = ' ',
6767
        string $encoding = 'UTF-8'
6768
    ): string {
6769 11
        return self::str_pad(
6770 11
            $str,
6771 11
            $length,
6772 11
            $pad_str,
6773 11
            \STR_PAD_BOTH,
6774 11
            $encoding
6775
        );
6776
    }
6777
6778
    /**
6779
     * Returns a new string of a given length such that the beginning of the
6780
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
6781
     *
6782
     * @param string $str
6783
     * @param int    $length   <p>Desired string length after padding.</p>
6784
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6785
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6786
     *
6787
     * @return string
6788
     *                <p>The string with left padding.</p>
6789
     */
6790 7
    public static function str_pad_left(
6791
        string $str,
6792
        int $length,
6793
        string $pad_str = ' ',
6794
        string $encoding = 'UTF-8'
6795
    ): string {
6796 7
        return self::str_pad(
6797 7
            $str,
6798 7
            $length,
6799 7
            $pad_str,
6800 7
            \STR_PAD_LEFT,
6801 7
            $encoding
6802
        );
6803
    }
6804
6805
    /**
6806
     * Returns a new string of a given length such that the end of the string
6807
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
6808
     *
6809
     * @param string $str
6810
     * @param int    $length   <p>Desired string length after padding.</p>
6811
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6812
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6813
     *
6814
     * @return string
6815
     *                <p>The string with right padding.</p>
6816
     */
6817 7
    public static function str_pad_right(
6818
        string $str,
6819
        int $length,
6820
        string $pad_str = ' ',
6821
        string $encoding = 'UTF-8'
6822
    ): string {
6823 7
        return self::str_pad(
6824 7
            $str,
6825 7
            $length,
6826 7
            $pad_str,
6827 7
            \STR_PAD_RIGHT,
6828 7
            $encoding
6829
        );
6830
    }
6831
6832
    /**
6833
     * Repeat a string.
6834
     *
6835
     * @param string $str        <p>
6836
     *                           The string to be repeated.
6837
     *                           </p>
6838
     * @param int    $multiplier <p>
6839
     *                           Number of time the input string should be
6840
     *                           repeated.
6841
     *                           </p>
6842
     *                           <p>
6843
     *                           multiplier has to be greater than or equal to 0.
6844
     *                           If the multiplier is set to 0, the function
6845
     *                           will return an empty string.
6846
     *                           </p>
6847
     *
6848
     * @return string
6849
     *                <p>The repeated string.</P>
6850
     */
6851 9
    public static function str_repeat(string $str, int $multiplier): string
6852
    {
6853 9
        $str = self::filter($str);
6854
6855 9
        return \str_repeat($str, $multiplier);
6856
    }
6857
6858
    /**
6859
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6860
     *
6861
     * Replace all occurrences of the search string with the replacement string
6862
     *
6863
     * @see http://php.net/manual/en/function.str-replace.php
6864
     *
6865
     * @param mixed $search  <p>
6866
     *                       The value being searched for, otherwise known as the needle.
6867
     *                       An array may be used to designate multiple needles.
6868
     *                       </p>
6869
     * @param mixed $replace <p>
6870
     *                       The replacement value that replaces found search
6871
     *                       values. An array may be used to designate multiple replacements.
6872
     *                       </p>
6873
     * @param mixed $subject <p>
6874
     *                       The string or array being searched and replaced on,
6875
     *                       otherwise known as the haystack.
6876
     *                       </p>
6877
     *                       <p>
6878
     *                       If subject is an array, then the search and
6879
     *                       replace is performed with every entry of
6880
     *                       subject, and the return value is an array as
6881
     *                       well.
6882
     *                       </p>
6883
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6884
     *
6885
     * @return mixed this function returns a string or an array with the replaced values
6886
     */
6887 12
    public static function str_replace(
6888
        $search,
6889
        $replace,
6890
        $subject,
6891
        int &$count = null
6892
    ) {
6893
        /**
6894
         * @psalm-suppress PossiblyNullArgument
6895
         */
6896 12
        return \str_replace(
6897 12
            $search,
6898 12
            $replace,
6899 12
            $subject,
6900 12
            $count
6901
        );
6902
    }
6903
6904
    /**
6905
     * Replaces $search from the beginning of string with $replacement.
6906
     *
6907
     * @param string $str         <p>The input string.</p>
6908
     * @param string $search      <p>The string to search for.</p>
6909
     * @param string $replacement <p>The replacement.</p>
6910
     *
6911
     * @return string
6912
     *                <p>A string after the replacements.</p>
6913
     */
6914 17
    public static function str_replace_beginning(
6915
        string $str,
6916
        string $search,
6917
        string $replacement
6918
    ): string {
6919 17
        if ($str === '') {
6920 4
            if ($replacement === '') {
6921 2
                return '';
6922
            }
6923
6924 2
            if ($search === '') {
6925 2
                return $replacement;
6926
            }
6927
        }
6928
6929 13
        if ($search === '') {
6930 2
            return $str . $replacement;
6931
        }
6932
6933 11
        if (\strpos($str, $search) === 0) {
6934 9
            return $replacement . \substr($str, \strlen($search));
6935
        }
6936
6937 2
        return $str;
6938
    }
6939
6940
    /**
6941
     * Replaces $search from the ending of string with $replacement.
6942
     *
6943
     * @param string $str         <p>The input string.</p>
6944
     * @param string $search      <p>The string to search for.</p>
6945
     * @param string $replacement <p>The replacement.</p>
6946
     *
6947
     * @return string
6948
     *                <p>A string after the replacements.</p>
6949
     */
6950 17
    public static function str_replace_ending(
6951
        string $str,
6952
        string $search,
6953
        string $replacement
6954
    ): string {
6955 17
        if ($str === '') {
6956 4
            if ($replacement === '') {
6957 2
                return '';
6958
            }
6959
6960 2
            if ($search === '') {
6961 2
                return $replacement;
6962
            }
6963
        }
6964
6965 13
        if ($search === '') {
6966 2
            return $str . $replacement;
6967
        }
6968
6969 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6970 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6971
        }
6972
6973 11
        return $str;
6974
    }
6975
6976
    /**
6977
     * Replace the first "$search"-term with the "$replace"-term.
6978
     *
6979
     * @param string $search
6980
     * @param string $replace
6981
     * @param string $subject
6982
     *
6983
     * @return string
6984
     *
6985
     * @psalm-suppress InvalidReturnType
6986
     */
6987 2
    public static function str_replace_first(
6988
        string $search,
6989
        string $replace,
6990
        string $subject
6991
    ): string {
6992 2
        $pos = self::strpos($subject, $search);
6993
6994 2
        if ($pos !== false) {
6995
            /**
6996
             * @psalm-suppress InvalidReturnStatement
6997
             */
6998 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6999 2
                $subject,
7000 2
                $replace,
7001 2
                $pos,
7002 2
                (int) self::strlen($search)
7003
            );
7004
        }
7005
7006 2
        return $subject;
7007
    }
7008
7009
    /**
7010
     * Replace the last "$search"-term with the "$replace"-term.
7011
     *
7012
     * @param string $search
7013
     * @param string $replace
7014
     * @param string $subject
7015
     *
7016
     * @return string
7017
     *
7018
     * @psalm-suppress InvalidReturnType
7019
     */
7020 2
    public static function str_replace_last(
7021
        string $search,
7022
        string $replace,
7023
        string $subject
7024
    ): string {
7025 2
        $pos = self::strrpos($subject, $search);
7026 2
        if ($pos !== false) {
7027
            /**
7028
             * @psalm-suppress InvalidReturnStatement
7029
             */
7030 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7031 2
                $subject,
7032 2
                $replace,
7033 2
                $pos,
7034 2
                (int) self::strlen($search)
7035
            );
7036
        }
7037
7038 2
        return $subject;
7039
    }
7040
7041
    /**
7042
     * Shuffles all the characters in the string.
7043
     *
7044
     * PS: uses random algorithm which is weak for cryptography purposes
7045
     *
7046
     * @param string $str      <p>The input string</p>
7047
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7048
     *
7049
     * @return string
7050
     *                <p>The shuffled string.</p>
7051
     */
7052 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7053
    {
7054 5
        if ($encoding === 'UTF-8') {
7055 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7056
            /** @noinspection NonSecureShuffleUsageInspection */
7057 5
            \shuffle($indexes);
7058
7059
            // init
7060 5
            $shuffled_str = '';
7061
7062 5
            foreach ($indexes as &$i) {
7063 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7064 5
                if ($tmp_sub_str !== false) {
7065 5
                    $shuffled_str .= $tmp_sub_str;
7066
                }
7067
            }
7068
        } else {
7069
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7070
7071
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7072
            /** @noinspection NonSecureShuffleUsageInspection */
7073
            \shuffle($indexes);
7074
7075
            // init
7076
            $shuffled_str = '';
7077
7078
            foreach ($indexes as &$i) {
7079
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7080
                if ($tmp_sub_str !== false) {
7081
                    $shuffled_str .= $tmp_sub_str;
7082
                }
7083
            }
7084
        }
7085
7086 5
        return $shuffled_str;
7087
    }
7088
7089
    /**
7090
     * Returns the substring beginning at $start, and up to, but not including
7091
     * the index specified by $end. If $end is omitted, the function extracts
7092
     * the remaining string. If $end is negative, it is computed from the end
7093
     * of the string.
7094
     *
7095
     * @param string $str
7096
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7097
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7098
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7099
     *
7100
     * @return false|string
7101
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7102
     *                      characters long, <b>FALSE</b> will be returned.
7103
     */
7104 18
    public static function str_slice(
7105
        string $str,
7106
        int $start,
7107
        int $end = null,
7108
        string $encoding = 'UTF-8'
7109
    ) {
7110 18
        if ($encoding === 'UTF-8') {
7111 7
            if ($end === null) {
7112 1
                $length = (int) \mb_strlen($str);
7113 6
            } elseif ($end >= 0 && $end <= $start) {
7114 2
                return '';
7115 4
            } elseif ($end < 0) {
7116 1
                $length = (int) \mb_strlen($str) + $end - $start;
7117
            } else {
7118 3
                $length = $end - $start;
7119
            }
7120
7121 5
            return \mb_substr($str, $start, $length);
7122
        }
7123
7124 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7125
7126 11
        if ($end === null) {
7127 5
            $length = (int) self::strlen($str, $encoding);
7128 6
        } elseif ($end >= 0 && $end <= $start) {
7129 2
            return '';
7130 4
        } elseif ($end < 0) {
7131 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7132
        } else {
7133 3
            $length = $end - $start;
7134
        }
7135
7136 9
        return self::substr($str, $start, $length, $encoding);
7137
    }
7138
7139
    /**
7140
     * Convert a string to e.g.: "snake_case"
7141
     *
7142
     * @param string $str
7143
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7144
     *
7145
     * @return string
7146
     *                <p>A string in snake_case.</p>
7147
     */
7148 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7149
    {
7150 22
        if ($str === '') {
7151
            return '';
7152
        }
7153
7154 22
        $str = \str_replace(
7155 22
            '-',
7156 22
            '_',
7157 22
            self::normalize_whitespace($str)
7158
        );
7159
7160 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7161 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7162
        }
7163
7164 22
        $str = (string) \preg_replace_callback(
7165 22
            '/([\\p{N}|\\p{Lu}])/u',
7166
            /**
7167
             * @param string[] $matches
7168
             *
7169
             * @return string
7170
             */
7171
            static function (array $matches) use ($encoding): string {
7172 9
                $match = $matches[1];
7173 9
                $match_int = (int) $match;
7174
7175 9
                if ((string) $match_int === $match) {
7176 4
                    return '_' . $match . '_';
7177
                }
7178
7179 5
                if ($encoding === 'UTF-8') {
7180 5
                    return '_' . \mb_strtolower($match);
7181
                }
7182
7183
                return '_' . self::strtolower($match, $encoding);
7184 22
            },
7185 22
            $str
7186
        );
7187
7188 22
        $str = (string) \preg_replace(
7189
            [
7190 22
                '/\\s+/u',           // convert spaces to "_"
7191
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7192
                '/_+/',                 // remove double "_"
7193
            ],
7194
            [
7195 22
                '_',
7196
                '',
7197
                '_',
7198
            ],
7199 22
            $str
7200
        );
7201
7202 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7203
    }
7204
7205
    /**
7206
     * Sort all characters according to code points.
7207
     *
7208
     * @param string $str    <p>A UTF-8 string.</p>
7209
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7210
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7211
     *
7212
     * @return string
7213
     *                <p>A string of sorted characters.</p>
7214
     */
7215 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7216
    {
7217 2
        $array = self::codepoints($str);
7218
7219 2
        if ($unique) {
7220 2
            $array = \array_flip(\array_flip($array));
7221
        }
7222
7223 2
        if ($desc) {
7224 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7224
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7225
        } else {
7226 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7226
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7227
        }
7228
7229 2
        return self::string($array);
7230
    }
7231
7232
    /**
7233
     * Convert a string to an array of Unicode characters.
7234
     *
7235
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7236
     * @param int                       $length                  [optional] <p>Max character length of each array
7237
     *                                                           element.</p>
7238
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7239
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7240
     *                                                           "mb_substr"</p>
7241
     *
7242
     * @return array
7243
     *                  <p>An array containing chunks of the input.</p>
7244
     */
7245 89
    public static function str_split(
7246
        $str,
7247
        int $length = 1,
7248
        bool $clean_utf8 = false,
7249
        bool $try_to_use_mb_functions = true
7250
    ): array {
7251 89
        if ($length <= 0) {
7252 3
            return [];
7253
        }
7254
7255 88
        if (\is_array($str) === true) {
7256 2
            foreach ($str as $k => &$v) {
7257 2
                $v = self::str_split(
7258 2
                    $v,
7259 2
                    $length,
7260 2
                    $clean_utf8,
7261 2
                    $try_to_use_mb_functions
7262
                );
7263
            }
7264
7265 2
            return $str;
7266
        }
7267
7268
        // init
7269 88
        $str = (string) $str;
7270
7271 88
        if ($str === '') {
7272 13
            return [];
7273
        }
7274
7275 85
        if ($clean_utf8 === true) {
7276 19
            $str = self::clean($str);
7277
        }
7278
7279
        if (
7280 85
            $try_to_use_mb_functions === true
7281
            &&
7282 85
            self::$SUPPORT['mbstring'] === true
7283
        ) {
7284 81
            if (Bootup::is_php('7.4')) {
7285
                $return = \mb_str_split($str, $length);
7286
                if ($return !== false) {
7287
                    return $return;
7288
                }
7289
            }
7290
7291 81
            $i_max = \mb_strlen($str);
7292 81
            if ($i_max <= 127) {
7293 75
                $ret = [];
7294 75
                for ($i = 0; $i < $i_max; ++$i) {
7295 75
                    $ret[] = \mb_substr($str, $i, 1);
7296
                }
7297
            } else {
7298 16
                $return_array = [];
7299 16
                \preg_match_all('/./us', $str, $return_array);
7300 81
                $ret = $return_array[0] ?? [];
7301
            }
7302 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7303 17
            $return_array = [];
7304 17
            \preg_match_all('/./us', $str, $return_array);
7305 17
            $ret = $return_array[0] ?? [];
7306
        } else {
7307
7308
            // fallback
7309
7310 8
            $ret = [];
7311 8
            $len = \strlen($str);
7312
7313
            /** @noinspection ForeachInvariantsInspection */
7314 8
            for ($i = 0; $i < $len; ++$i) {
7315 8
                if (($str[$i] & "\x80") === "\x00") {
7316 8
                    $ret[] = $str[$i];
7317
                } elseif (
7318 8
                    isset($str[$i + 1])
7319
                    &&
7320 8
                    ($str[$i] & "\xE0") === "\xC0"
7321
                ) {
7322 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7323 4
                        $ret[] = $str[$i] . $str[$i + 1];
7324
7325 4
                        ++$i;
7326
                    }
7327
                } elseif (
7328 6
                    isset($str[$i + 2])
7329
                    &&
7330 6
                    ($str[$i] & "\xF0") === "\xE0"
7331
                ) {
7332
                    if (
7333 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7334
                        &&
7335 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7336
                    ) {
7337 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7338
7339 6
                        $i += 2;
7340
                    }
7341
                } elseif (
7342
                    isset($str[$i + 3])
7343
                    &&
7344
                    ($str[$i] & "\xF8") === "\xF0"
7345
                ) {
7346
                    if (
7347
                        ($str[$i + 1] & "\xC0") === "\x80"
7348
                        &&
7349
                        ($str[$i + 2] & "\xC0") === "\x80"
7350
                        &&
7351
                        ($str[$i + 3] & "\xC0") === "\x80"
7352
                    ) {
7353
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7354
7355
                        $i += 3;
7356
                    }
7357
                }
7358
            }
7359
        }
7360
7361 85
        if ($length > 1) {
7362 11
            $ret = \array_chunk($ret, $length);
7363
7364 11
            return \array_map(
7365
                static function (array &$item): string {
7366 11
                    return \implode('', $item);
7367 11
                },
7368 11
                $ret
7369
            );
7370
        }
7371
7372 78
        if (isset($ret[0]) && $ret[0] === '') {
7373
            return [];
7374
        }
7375
7376 78
        return $ret;
7377
    }
7378
7379
    /**
7380
     * Splits the string with the provided regular expression, returning an
7381
     * array of strings. An optional integer $limit will truncate the
7382
     * results.
7383
     *
7384
     * @param string $str
7385
     * @param string $pattern <p>The regex with which to split the string.</p>
7386
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7387
     *
7388
     * @return string[]
7389
     *                  <p>An array of strings.</p>
7390
     */
7391 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7392
    {
7393 16
        if ($limit === 0) {
7394 2
            return [];
7395
        }
7396
7397 14
        if ($pattern === '') {
7398 1
            return [$str];
7399
        }
7400
7401 13
        if (self::$SUPPORT['mbstring'] === true) {
7402 13
            if ($limit >= 0) {
7403
                /** @noinspection PhpComposerExtensionStubsInspection */
7404 8
                $result_tmp = \mb_split($pattern, $str);
7405
7406 8
                $result = [];
7407 8
                foreach ($result_tmp as $item_tmp) {
7408 8
                    if ($limit === 0) {
7409 4
                        break;
7410
                    }
7411 8
                    --$limit;
7412
7413 8
                    $result[] = $item_tmp;
7414
                }
7415
7416 8
                return $result;
7417
            }
7418
7419
            /** @noinspection PhpComposerExtensionStubsInspection */
7420 5
            return \mb_split($pattern, $str);
7421
        }
7422
7423
        if ($limit > 0) {
7424
            ++$limit;
7425
        } else {
7426
            $limit = -1;
7427
        }
7428
7429
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7430
7431
        if ($array === false) {
7432
            return [];
7433
        }
7434
7435
        if ($limit > 0 && \count($array) === $limit) {
7436
            \array_pop($array);
7437
        }
7438
7439
        return $array;
7440
    }
7441
7442
    /**
7443
     * Check if the string starts with the given substring.
7444
     *
7445
     * @param string $haystack <p>The string to search in.</p>
7446
     * @param string $needle   <p>The substring to search for.</p>
7447
     *
7448
     * @return bool
7449
     */
7450 19
    public static function str_starts_with(string $haystack, string $needle): bool
7451
    {
7452 19
        if ($needle === '') {
7453 2
            return true;
7454
        }
7455
7456 19
        if ($haystack === '') {
7457
            return false;
7458
        }
7459
7460 19
        return \strpos($haystack, $needle) === 0;
7461
    }
7462
7463
    /**
7464
     * Returns true if the string begins with any of $substrings, false otherwise.
7465
     *
7466
     * - case-sensitive
7467
     *
7468
     * @param string $str        <p>The input string.</p>
7469
     * @param array  $substrings <p>Substrings to look for.</p>
7470
     *
7471
     * @return bool whether or not $str starts with $substring
7472
     */
7473 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7474
    {
7475 8
        if ($str === '') {
7476
            return false;
7477
        }
7478
7479 8
        if ($substrings === []) {
7480
            return false;
7481
        }
7482
7483 8
        foreach ($substrings as &$substring) {
7484 8
            if (self::str_starts_with($str, $substring)) {
7485 8
                return true;
7486
            }
7487
        }
7488
7489 6
        return false;
7490
    }
7491
7492
    /**
7493
     * Gets the substring after the first occurrence of a separator.
7494
     *
7495
     * @param string $str       <p>The input string.</p>
7496
     * @param string $separator <p>The string separator.</p>
7497
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7498
     *
7499
     * @return string
7500
     */
7501 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7502
    {
7503 1
        if ($separator === '' || $str === '') {
7504 1
            return '';
7505
        }
7506
7507 1
        if ($encoding === 'UTF-8') {
7508 1
            $offset = \mb_strpos($str, $separator);
7509 1
            if ($offset === false) {
7510 1
                return '';
7511
            }
7512
7513 1
            return (string) \mb_substr(
7514 1
                $str,
7515 1
                $offset + (int) \mb_strlen($separator)
7516
            );
7517
        }
7518
7519
        $offset = self::strpos($str, $separator, 0, $encoding);
7520
        if ($offset === false) {
7521
            return '';
7522
        }
7523
7524
        return (string) \mb_substr(
7525
            $str,
7526
            $offset + (int) self::strlen($separator, $encoding),
7527
            null,
7528
            $encoding
7529
        );
7530
    }
7531
7532
    /**
7533
     * Gets the substring after the last occurrence of a separator.
7534
     *
7535
     * @param string $str       <p>The input string.</p>
7536
     * @param string $separator <p>The string separator.</p>
7537
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7538
     *
7539
     * @return string
7540
     */
7541 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7542
    {
7543 1
        if ($separator === '' || $str === '') {
7544 1
            return '';
7545
        }
7546
7547 1
        if ($encoding === 'UTF-8') {
7548 1
            $offset = \mb_strrpos($str, $separator);
7549 1
            if ($offset === false) {
7550 1
                return '';
7551
            }
7552
7553 1
            return (string) \mb_substr(
7554 1
                $str,
7555 1
                $offset + (int) \mb_strlen($separator)
7556
            );
7557
        }
7558
7559
        $offset = self::strrpos($str, $separator, 0, $encoding);
7560
        if ($offset === false) {
7561
            return '';
7562
        }
7563
7564
        return (string) self::substr(
7565
            $str,
7566
            $offset + (int) self::strlen($separator, $encoding),
7567
            null,
7568
            $encoding
7569
        );
7570
    }
7571
7572
    /**
7573
     * Gets the substring before the first occurrence of a separator.
7574
     *
7575
     * @param string $str       <p>The input string.</p>
7576
     * @param string $separator <p>The string separator.</p>
7577
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7578
     *
7579
     * @return string
7580
     */
7581 1
    public static function str_substr_before_first_separator(
7582
        string $str,
7583
        string $separator,
7584
        string $encoding = 'UTF-8'
7585
    ): string {
7586 1
        if ($separator === '' || $str === '') {
7587 1
            return '';
7588
        }
7589
7590 1
        if ($encoding === 'UTF-8') {
7591 1
            $offset = \mb_strpos($str, $separator);
7592 1
            if ($offset === false) {
7593 1
                return '';
7594
            }
7595
7596 1
            return (string) \mb_substr(
7597 1
                $str,
7598 1
                0,
7599 1
                $offset
7600
            );
7601
        }
7602
7603
        $offset = self::strpos($str, $separator, 0, $encoding);
7604
        if ($offset === false) {
7605
            return '';
7606
        }
7607
7608
        return (string) self::substr(
7609
            $str,
7610
            0,
7611
            $offset,
7612
            $encoding
7613
        );
7614
    }
7615
7616
    /**
7617
     * Gets the substring before the last occurrence of a separator.
7618
     *
7619
     * @param string $str       <p>The input string.</p>
7620
     * @param string $separator <p>The string separator.</p>
7621
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7622
     *
7623
     * @return string
7624
     */
7625 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7626
    {
7627 1
        if ($separator === '' || $str === '') {
7628 1
            return '';
7629
        }
7630
7631 1
        if ($encoding === 'UTF-8') {
7632 1
            $offset = \mb_strrpos($str, $separator);
7633 1
            if ($offset === false) {
7634 1
                return '';
7635
            }
7636
7637 1
            return (string) \mb_substr(
7638 1
                $str,
7639 1
                0,
7640 1
                $offset
7641
            );
7642
        }
7643
7644
        $offset = self::strrpos($str, $separator, 0, $encoding);
7645
        if ($offset === false) {
7646
            return '';
7647
        }
7648
7649
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7650
7651
        return (string) self::substr(
7652
            $str,
7653
            0,
7654
            $offset,
7655
            $encoding
7656
        );
7657
    }
7658
7659
    /**
7660
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7661
     *
7662
     * @param string $str           <p>The input string.</p>
7663
     * @param string $needle        <p>The string to look for.</p>
7664
     * @param bool   $before_needle [optional] <p>Default: false</p>
7665
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7666
     *
7667
     * @return string
7668
     */
7669 2
    public static function str_substr_first(
7670
        string $str,
7671
        string $needle,
7672
        bool $before_needle = false,
7673
        string $encoding = 'UTF-8'
7674
    ): string {
7675 2
        if ($str === '' || $needle === '') {
7676 2
            return '';
7677
        }
7678
7679 2
        if ($encoding === 'UTF-8') {
7680 2
            if ($before_needle === true) {
7681 1
                $part = \mb_strstr(
7682 1
                    $str,
7683 1
                    $needle,
7684 1
                    $before_needle
7685
                );
7686
            } else {
7687 1
                $part = \mb_strstr(
7688 1
                    $str,
7689 2
                    $needle
7690
                );
7691
            }
7692
        } else {
7693
            $part = self::strstr(
7694
                $str,
7695
                $needle,
7696
                $before_needle,
7697
                $encoding
7698
            );
7699
        }
7700
7701 2
        return $part === false ? '' : $part;
7702
    }
7703
7704
    /**
7705
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7706
     *
7707
     * @param string $str           <p>The input string.</p>
7708
     * @param string $needle        <p>The string to look for.</p>
7709
     * @param bool   $before_needle [optional] <p>Default: false</p>
7710
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7711
     *
7712
     * @return string
7713
     */
7714 2
    public static function str_substr_last(
7715
        string $str,
7716
        string $needle,
7717
        bool $before_needle = false,
7718
        string $encoding = 'UTF-8'
7719
    ): string {
7720 2
        if ($str === '' || $needle === '') {
7721 2
            return '';
7722
        }
7723
7724 2
        if ($encoding === 'UTF-8') {
7725 2
            if ($before_needle === true) {
7726 1
                $part = \mb_strrchr(
7727 1
                    $str,
7728 1
                    $needle,
7729 1
                    $before_needle
7730
                );
7731
            } else {
7732 1
                $part = \mb_strrchr(
7733 1
                    $str,
7734 2
                    $needle
7735
                );
7736
            }
7737
        } else {
7738
            $part = self::strrchr(
7739
                $str,
7740
                $needle,
7741
                $before_needle,
7742
                $encoding
7743
            );
7744
        }
7745
7746 2
        return $part === false ? '' : $part;
7747
    }
7748
7749
    /**
7750
     * Surrounds $str with the given substring.
7751
     *
7752
     * @param string $str
7753
     * @param string $substring <p>The substring to add to both sides.</P>
7754
     *
7755
     * @return string
7756
     *                <p>A string with the substring both prepended and appended.</p>
7757
     */
7758 5
    public static function str_surround(string $str, string $substring): string
7759
    {
7760 5
        return $substring . $str . $substring;
7761
    }
7762
7763
    /**
7764
     * Returns a trimmed string with the first letter of each word capitalized.
7765
     * Also accepts an array, $ignore, allowing you to list words not to be
7766
     * capitalized.
7767
     *
7768
     * @param string              $str
7769
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7770
     *                                                           Default: null</p>
7771
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7772
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7773
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7774
     *                                                           tr</p>
7775
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7776
     *                                                           ß</p>
7777
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7778
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7779
     *
7780
     * @return string
7781
     *                <p>The titleized string.</p>
7782
     */
7783 10
    public static function str_titleize(
7784
        string $str,
7785
        array $ignore = null,
7786
        string $encoding = 'UTF-8',
7787
        bool $clean_utf8 = false,
7788
        string $lang = null,
7789
        bool $try_to_keep_the_string_length = false,
7790
        bool $use_trim_first = true,
7791
        string $word_define_chars = null
7792
    ): string {
7793 10
        if ($str === '') {
7794
            return '';
7795
        }
7796
7797 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7798 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7799
        }
7800
7801 10
        if ($use_trim_first === true) {
7802 10
            $str = \trim($str);
7803
        }
7804
7805 10
        if ($clean_utf8 === true) {
7806
            $str = self::clean($str);
7807
        }
7808
7809 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7810
7811 10
        if ($word_define_chars) {
7812 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7813
        } else {
7814 6
            $word_define_chars = '';
7815
        }
7816
7817 10
        $str = (string) \preg_replace_callback(
7818 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7819
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7820 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7821 4
                    return $match[0];
7822
                }
7823
7824 10
                if ($use_mb_functions === true) {
7825 10
                    if ($encoding === 'UTF-8') {
7826 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7827 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7828
                    }
7829
7830
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7831
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7832
                }
7833
7834
                return self::ucfirst(
7835
                    self::strtolower(
7836
                        $match[0],
7837
                        $encoding,
7838
                        false,
7839
                        $lang,
7840
                        $try_to_keep_the_string_length
7841
                    ),
7842
                    $encoding,
7843
                    false,
7844
                    $lang,
7845
                    $try_to_keep_the_string_length
7846
                );
7847 10
            },
7848 10
            $str
7849
        );
7850
7851 10
        return $str;
7852
    }
7853
7854
    /**
7855
     * Returns a trimmed string in proper title case.
7856
     *
7857
     * Also accepts an array, $ignore, allowing you to list words not to be
7858
     * capitalized.
7859
     *
7860
     * Adapted from John Gruber's script.
7861
     *
7862
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7863
     *
7864
     * @param string $str
7865
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7866
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7867
     *
7868
     * @return string
7869
     *                <p>The titleized string.</p>
7870
     */
7871 35
    public static function str_titleize_for_humans(
7872
        string $str,
7873
        array $ignore = [],
7874
        string $encoding = 'UTF-8'
7875
    ): string {
7876 35
        if ($str === '') {
7877
            return '';
7878
        }
7879
7880
        $small_words = [
7881 35
            '(?<!q&)a',
7882
            'an',
7883
            'and',
7884
            'as',
7885
            'at(?!&t)',
7886
            'but',
7887
            'by',
7888
            'en',
7889
            'for',
7890
            'if',
7891
            'in',
7892
            'of',
7893
            'on',
7894
            'or',
7895
            'the',
7896
            'to',
7897
            'v[.]?',
7898
            'via',
7899
            'vs[.]?',
7900
        ];
7901
7902 35
        if ($ignore !== []) {
7903 1
            $small_words = \array_merge($small_words, $ignore);
7904
        }
7905
7906 35
        $small_words_rx = \implode('|', $small_words);
7907 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7908
7909 35
        $str = \trim($str);
7910
7911 35
        if (self::has_lowercase($str) === false) {
7912 2
            $str = self::strtolower($str, $encoding);
7913
        }
7914
7915
        // the main substitutions
7916 35
        $str = (string) \preg_replace_callback(
7917
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7918
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7919 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7920
                        |
7921 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7922
                        |
7923 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7924
                        |
7925 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7926
                      ) (_*) \\b                                                          # 6. With trailing underscore
7927
                    ~ux',
7928
            /**
7929
             * @param string[] $matches
7930
             *
7931
             * @return string
7932
             */
7933
            static function (array $matches) use ($encoding): string {
7934
                // preserve leading underscore
7935 35
                $str = $matches[1];
7936 35
                if ($matches[2]) {
7937
                    // preserve URLs, domains, emails and file paths
7938 5
                    $str .= $matches[2];
7939 35
                } elseif ($matches[3]) {
7940
                    // lower-case small words
7941 25
                    $str .= self::strtolower($matches[3], $encoding);
7942 35
                } elseif ($matches[4]) {
7943
                    // capitalize word w/o internal caps
7944 34
                    $str .= static::ucfirst($matches[4], $encoding);
7945
                } else {
7946
                    // preserve other kinds of word (iPhone)
7947 7
                    $str .= $matches[5];
7948
                }
7949
                // preserve trailing underscore
7950 35
                $str .= $matches[6];
7951
7952 35
                return $str;
7953 35
            },
7954 35
            $str
7955
        );
7956
7957
        // Exceptions for small words: capitalize at start of title...
7958 35
        $str = (string) \preg_replace_callback(
7959
            '~(  \\A [[:punct:]]*            # start of title...
7960
                      |  [:.;?!][ ]+                # or of subsentence...
7961
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7962 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7963
                     ~uxi',
7964
            /**
7965
             * @param string[] $matches
7966
             *
7967
             * @return string
7968
             */
7969
            static function (array $matches) use ($encoding): string {
7970 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7971 35
            },
7972 35
            $str
7973
        );
7974
7975
        // ...and end of title
7976 35
        $str = (string) \preg_replace_callback(
7977 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
7978
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7979
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7980
                     ~uxi',
7981
            /**
7982
             * @param string[] $matches
7983
             *
7984
             * @return string
7985
             */
7986
            static function (array $matches) use ($encoding): string {
7987 3
                return static::ucfirst($matches[1], $encoding);
7988 35
            },
7989 35
            $str
7990
        );
7991
7992
        // Exceptions for small words in hyphenated compound words.
7993
        // e.g. "in-flight" -> In-Flight
7994 35
        $str = (string) \preg_replace_callback(
7995
            '~\\b
7996
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7997 35
                        ( ' . $small_words_rx . ' )
7998
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7999
                       ~uxi',
8000
            /**
8001
             * @param string[] $matches
8002
             *
8003
             * @return string
8004
             */
8005
            static function (array $matches) use ($encoding): string {
8006
                return static::ucfirst($matches[1], $encoding);
8007 35
            },
8008 35
            $str
8009
        );
8010
8011
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8012 35
        $str = (string) \preg_replace_callback(
8013
            '~\\b
8014
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8015
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8016 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8017
                      (?!	- )                 # Negative lookahead for another -
8018
                     ~uxi',
8019
            /**
8020
             * @param string[] $matches
8021
             *
8022
             * @return string
8023
             */
8024
            static function (array $matches) use ($encoding): string {
8025
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8026 35
            },
8027 35
            $str
8028
        );
8029
8030 35
        return $str;
8031
    }
8032
8033
    /**
8034
     * Get a binary representation of a specific string.
8035
     *
8036
     * @param string $str <p>The input string.</p>
8037
     *
8038
     * @return false|string
8039
     *                      <p>false on error</p>
8040
     */
8041 2
    public static function str_to_binary(string $str)
8042
    {
8043
        /** @var array|false $value - needed for PhpStan (stubs error) */
8044 2
        $value = \unpack('H*', $str);
8045 2
        if ($value === false) {
8046
            return false;
8047
        }
8048
8049
        /** @noinspection OffsetOperationsInspection */
8050 2
        return \base_convert($value[1], 16, 2);
8051
    }
8052
8053
    /**
8054
     * @param string   $str
8055
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8056
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8057
     *
8058
     * @return string[]
8059
     */
8060 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8061
    {
8062 17
        if ($str === '') {
8063 1
            return $remove_empty_values === true ? [] : [''];
8064
        }
8065
8066 16
        if (self::$SUPPORT['mbstring'] === true) {
8067
            /** @noinspection PhpComposerExtensionStubsInspection */
8068 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8069
        } else {
8070
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8071
        }
8072
8073 16
        if ($return === false) {
8074
            return $remove_empty_values === true ? [] : [''];
8075
        }
8076
8077
        if (
8078 16
            $remove_short_values === null
8079
            &&
8080 16
            $remove_empty_values === false
8081
        ) {
8082 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8083
        }
8084
8085
        return self::reduce_string_array(
8086
            $return,
8087
            $remove_empty_values,
8088
            $remove_short_values
8089
        );
8090
    }
8091
8092
    /**
8093
     * Convert a string into an array of words.
8094
     *
8095
     * @param string   $str
8096
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8097
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8098
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8099
     *
8100
     * @return string[]
8101
     */
8102 13
    public static function str_to_words(
8103
        string $str,
8104
        string $char_list = '',
8105
        bool $remove_empty_values = false,
8106
        int $remove_short_values = null
8107
    ): array {
8108 13
        if ($str === '') {
8109 4
            return $remove_empty_values === true ? [] : [''];
8110
        }
8111
8112 13
        $char_list = self::rxClass($char_list, '\pL');
8113
8114 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8115 13
        if ($return === false) {
8116
            return $remove_empty_values === true ? [] : [''];
8117
        }
8118
8119
        if (
8120 13
            $remove_short_values === null
8121
            &&
8122 13
            $remove_empty_values === false
8123
        ) {
8124 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8125
        }
8126
8127 2
        $tmp_return = self::reduce_string_array(
8128 2
            $return,
8129 2
            $remove_empty_values,
8130 2
            $remove_short_values
8131
        );
8132
8133 2
        foreach ($tmp_return as &$item) {
8134 2
            $item = (string) $item;
8135
        }
8136
8137 2
        return $tmp_return;
8138
    }
8139
8140
    /**
8141
     * alias for "UTF8::to_ascii()"
8142
     *
8143
     * @param string $str
8144
     * @param string $unknown
8145
     * @param bool   $strict
8146
     *
8147
     * @return string
8148
     *
8149
     * @see UTF8::to_ascii()
8150
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
8151
     */
8152 7
    public static function str_transliterate(
8153
        string $str,
8154
        string $unknown = '?',
8155
        bool $strict = false
8156
    ): string {
8157 7
        return self::to_ascii($str, $unknown, $strict);
8158
    }
8159
8160
    /**
8161
     * Truncates the string to a given length. If $substring is provided, and
8162
     * truncating occurs, the string is further truncated so that the substring
8163
     * may be appended without exceeding the desired length.
8164
     *
8165
     * @param string $str
8166
     * @param int    $length    <p>Desired length of the truncated string.</p>
8167
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8168
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8169
     *
8170
     * @return string
8171
     *                <p>A string after truncating.</p>
8172
     */
8173 22
    public static function str_truncate(
8174
        string $str,
8175
        int $length,
8176
        string $substring = '',
8177
        string $encoding = 'UTF-8'
8178
    ): string {
8179 22
        if ($str === '') {
8180
            return '';
8181
        }
8182
8183 22
        if ($encoding === 'UTF-8') {
8184 10
            if ($length >= (int) \mb_strlen($str)) {
8185 2
                return $str;
8186
            }
8187
8188 8
            if ($substring !== '') {
8189 4
                $length -= (int) \mb_strlen($substring);
8190
8191
                /** @noinspection UnnecessaryCastingInspection */
8192 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8193
            }
8194
8195
            /** @noinspection UnnecessaryCastingInspection */
8196 4
            return (string) \mb_substr($str, 0, $length);
8197
        }
8198
8199 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8200
8201 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8202 2
            return $str;
8203
        }
8204
8205 10
        if ($substring !== '') {
8206 6
            $length -= (int) self::strlen($substring, $encoding);
8207
        }
8208
8209
        return (
8210 10
               (string) self::substr(
8211 10
                   $str,
8212 10
                   0,
8213 10
                   $length,
8214 10
                   $encoding
8215
               )
8216 10
               ) . $substring;
8217
    }
8218
8219
    /**
8220
     * Truncates the string to a given length, while ensuring that it does not
8221
     * split words. If $substring is provided, and truncating occurs, the
8222
     * string is further truncated so that the substring may be appended without
8223
     * exceeding the desired length.
8224
     *
8225
     * @param string $str
8226
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8227
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8228
     *                                                       ''</p>
8229
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8230
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8231
     *
8232
     * @return string
8233
     *                <p>A string after truncating.</p>
8234
     */
8235 47
    public static function str_truncate_safe(
8236
        string $str,
8237
        int $length,
8238
        string $substring = '',
8239
        string $encoding = 'UTF-8',
8240
        bool $ignore_do_not_split_words_for_one_word = false
8241
    ): string {
8242 47
        if ($str === '' || $length <= 0) {
8243 1
            return $substring;
8244
        }
8245
8246 47
        if ($encoding === 'UTF-8') {
8247 21
            if ($length >= (int) \mb_strlen($str)) {
8248 5
                return $str;
8249
            }
8250
8251
            // need to further trim the string so we can append the substring
8252 17
            $length -= (int) \mb_strlen($substring);
8253 17
            if ($length <= 0) {
8254 1
                return $substring;
8255
            }
8256
8257
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8258 17
            $truncated = \mb_substr($str, 0, $length);
8259 17
            if ($truncated === false) {
8260
                return '';
8261
            }
8262
8263
            // if the last word was truncated
8264 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8265 17
            if ($space_position !== $length) {
8266
                // find pos of the last occurrence of a space, get up to that
8267 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8268
8269
                if (
8270 13
                    $last_position !== false
8271
                    ||
8272 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8273
                ) {
8274 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8275
                }
8276
            }
8277
        } else {
8278 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8279
8280 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8281 4
                return $str;
8282
            }
8283
8284
            // need to further trim the string so we can append the substring
8285 22
            $length -= (int) self::strlen($substring, $encoding);
8286 22
            if ($length <= 0) {
8287
                return $substring;
8288
            }
8289
8290 22
            $truncated = self::substr($str, 0, $length, $encoding);
8291
8292 22
            if ($truncated === false) {
8293
                return '';
8294
            }
8295
8296
            // if the last word was truncated
8297 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8298 22
            if ($space_position !== $length) {
8299
                // find pos of the last occurrence of a space, get up to that
8300 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8301
8302
                if (
8303 12
                    $last_position !== false
8304
                    ||
8305 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8306
                ) {
8307 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8308
                }
8309
            }
8310
        }
8311
8312 39
        return $truncated . $substring;
8313
    }
8314
8315
    /**
8316
     * Returns a lowercase and trimmed string separated by underscores.
8317
     * Underscores are inserted before uppercase characters (with the exception
8318
     * of the first character of the string), and in place of spaces as well as
8319
     * dashes.
8320
     *
8321
     * @param string $str
8322
     *
8323
     * @return string
8324
     *                <p>The underscored string.</p>
8325
     */
8326 16
    public static function str_underscored(string $str): string
8327
    {
8328 16
        return self::str_delimit($str, '_');
8329
    }
8330
8331
    /**
8332
     * Returns an UpperCamelCase version of the supplied string. It trims
8333
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8334
     * and underscores, and removes spaces, dashes, underscores.
8335
     *
8336
     * @param string      $str                           <p>The input string.</p>
8337
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8338
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8339
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8340
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8341
     *
8342
     * @return string
8343
     *                <p>A string in UpperCamelCase.</p>
8344
     */
8345 13
    public static function str_upper_camelize(
8346
        string $str,
8347
        string $encoding = 'UTF-8',
8348
        bool $clean_utf8 = false,
8349
        string $lang = null,
8350
        bool $try_to_keep_the_string_length = false
8351
    ): string {
8352 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8353
    }
8354
8355
    /**
8356
     * alias for "UTF8::ucfirst()"
8357
     *
8358
     * @param string      $str
8359
     * @param string      $encoding
8360
     * @param bool        $clean_utf8
8361
     * @param string|null $lang
8362
     * @param bool        $try_to_keep_the_string_length
8363
     *
8364
     * @return string
8365
     *
8366
     * @see UTF8::ucfirst()
8367
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8368
     */
8369 5
    public static function str_upper_first(
8370
        string $str,
8371
        string $encoding = 'UTF-8',
8372
        bool $clean_utf8 = false,
8373
        string $lang = null,
8374
        bool $try_to_keep_the_string_length = false
8375
    ): string {
8376 5
        return self::ucfirst(
8377 5
            $str,
8378 5
            $encoding,
8379 5
            $clean_utf8,
8380 5
            $lang,
8381 5
            $try_to_keep_the_string_length
8382
        );
8383
    }
8384
8385
    /**
8386
     * Get the number of words in a specific string.
8387
     *
8388
     * @param string $str       <p>The input string.</p>
8389
     * @param int    $format    [optional] <p>
8390
     *                          <strong>0</strong> => return a number of words (default)<br>
8391
     *                          <strong>1</strong> => return an array of words<br>
8392
     *                          <strong>2</strong> => return an array of words with word-offset as key
8393
     *                          </p>
8394
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8395
     *
8396
     * @return int|string[] The number of words in the string
8397
     */
8398 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8399
    {
8400 2
        $str_parts = self::str_to_words($str, $char_list);
8401
8402 2
        $len = \count($str_parts);
8403
8404 2
        if ($format === 1) {
8405 2
            $number_of_words = [];
8406 2
            for ($i = 1; $i < $len; $i += 2) {
8407 2
                $number_of_words[] = $str_parts[$i];
8408
            }
8409 2
        } elseif ($format === 2) {
8410 2
            $number_of_words = [];
8411 2
            $offset = (int) self::strlen($str_parts[0]);
8412 2
            for ($i = 1; $i < $len; $i += 2) {
8413 2
                $number_of_words[$offset] = $str_parts[$i];
8414 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8415
            }
8416
        } else {
8417 2
            $number_of_words = (int) (($len - 1) / 2);
8418
        }
8419
8420 2
        return $number_of_words;
8421
    }
8422
8423
    /**
8424
     * Case-insensitive string comparison.
8425
     *
8426
     * INFO: Case-insensitive version of UTF8::strcmp()
8427
     *
8428
     * @param string $str1     <p>The first string.</p>
8429
     * @param string $str2     <p>The second string.</p>
8430
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8431
     *
8432
     * @return int
8433
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8434
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8435
     *             <strong>0</strong> if they are equal
8436
     */
8437 23
    public static function strcasecmp(
8438
        string $str1,
8439
        string $str2,
8440
        string $encoding = 'UTF-8'
8441
    ): int {
8442 23
        return self::strcmp(
8443 23
            self::strtocasefold(
8444 23
                $str1,
8445 23
                true,
8446 23
                false,
8447 23
                $encoding,
8448 23
                null,
8449 23
                false
8450
            ),
8451 23
            self::strtocasefold(
8452 23
                $str2,
8453 23
                true,
8454 23
                false,
8455 23
                $encoding,
8456 23
                null,
8457 23
                false
8458
            )
8459
        );
8460
    }
8461
8462
    /**
8463
     * alias for "UTF8::strstr()"
8464
     *
8465
     * @param string $haystack
8466
     * @param string $needle
8467
     * @param bool   $before_needle
8468
     * @param string $encoding
8469
     * @param bool   $clean_utf8
8470
     *
8471
     * @return false|string
8472
     *
8473
     * @see UTF8::strstr()
8474
     * @deprecated <p>please use "UTF8::strstr()"</p>
8475
     */
8476 2
    public static function strchr(
8477
        string $haystack,
8478
        string $needle,
8479
        bool $before_needle = false,
8480
        string $encoding = 'UTF-8',
8481
        bool $clean_utf8 = false
8482
    ) {
8483 2
        return self::strstr(
8484 2
            $haystack,
8485 2
            $needle,
8486 2
            $before_needle,
8487 2
            $encoding,
8488 2
            $clean_utf8
8489
        );
8490
    }
8491
8492
    /**
8493
     * Case-sensitive string comparison.
8494
     *
8495
     * @param string $str1 <p>The first string.</p>
8496
     * @param string $str2 <p>The second string.</p>
8497
     *
8498
     * @return int
8499
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8500
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8501
     *             <strong>0</strong> if they are equal
8502
     */
8503 29
    public static function strcmp(string $str1, string $str2): int
8504
    {
8505 29
        if ($str1 === $str2) {
8506 21
            return 0;
8507
        }
8508
8509 24
        return \strcmp(
8510 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8511 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8512
        );
8513
    }
8514
8515
    /**
8516
     * Find length of initial segment not matching mask.
8517
     *
8518
     * @param string $str
8519
     * @param string $char_list
8520
     * @param int    $offset
8521
     * @param int    $length
8522
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8523
     *
8524
     * @return int
8525
     */
8526 12
    public static function strcspn(
8527
        string $str,
8528
        string $char_list,
8529
        int $offset = null,
8530
        int $length = null,
8531
        string $encoding = 'UTF-8'
8532
    ): int {
8533 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8534
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8535
        }
8536
8537 12
        if ($char_list === '') {
8538 2
            return (int) self::strlen($str, $encoding);
8539
        }
8540
8541 11
        if ($offset !== null || $length !== null) {
8542 3
            if ($encoding === 'UTF-8') {
8543 3
                if ($length === null) {
8544
                    /** @noinspection UnnecessaryCastingInspection */
8545 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8546
                } else {
8547
                    /** @noinspection UnnecessaryCastingInspection */
8548 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8549
                }
8550
            } else {
8551
                /** @noinspection UnnecessaryCastingInspection */
8552
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8553
            }
8554
8555 3
            if ($str_tmp === false) {
8556
                return 0;
8557
            }
8558
8559
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8560 3
            $str = $str_tmp;
8561
        }
8562
8563 11
        if ($str === '') {
8564 2
            return 0;
8565
        }
8566
8567 10
        $matches = [];
8568 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8569 9
            $return = self::strlen($matches[1], $encoding);
8570 9
            if ($return === false) {
8571
                return 0;
8572
            }
8573
8574 9
            return $return;
8575
        }
8576
8577 2
        return (int) self::strlen($str, $encoding);
8578
    }
8579
8580
    /**
8581
     * alias for "UTF8::stristr()"
8582
     *
8583
     * @param string $haystack
8584
     * @param string $needle
8585
     * @param bool   $before_needle
8586
     * @param string $encoding
8587
     * @param bool   $clean_utf8
8588
     *
8589
     * @return false|string
8590
     *
8591
     * @see UTF8::stristr()
8592
     * @deprecated <p>please use "UTF8::stristr()"</p>
8593
     */
8594 1
    public static function strichr(
8595
        string $haystack,
8596
        string $needle,
8597
        bool $before_needle = false,
8598
        string $encoding = 'UTF-8',
8599
        bool $clean_utf8 = false
8600
    ) {
8601 1
        return self::stristr(
8602 1
            $haystack,
8603 1
            $needle,
8604 1
            $before_needle,
8605 1
            $encoding,
8606 1
            $clean_utf8
8607
        );
8608
    }
8609
8610
    /**
8611
     * Create a UTF-8 string from code points.
8612
     *
8613
     * INFO: opposite to UTF8::codepoints()
8614
     *
8615
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8616
     *
8617
     * @return string
8618
     *                <p>A UTF-8 encoded string.</p>
8619
     */
8620 4
    public static function string(array $array): string
8621
    {
8622 4
        if ($array === []) {
8623 4
            return  '';
8624
        }
8625
8626 4
        $str = '';
8627 4
        foreach ($array as $strPart) {
8628 4
            $str .= '&#' . (int) $strPart . ';';
8629
        }
8630
8631 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
8632
    }
8633
8634
    /**
8635
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8636
     *
8637
     * @param string $str <p>The input string.</p>
8638
     *
8639
     * @return bool
8640
     *              <strong>true</strong> if the string has BOM at the start,<br>
8641
     *              <strong>false</strong> otherwise
8642
     */
8643 6
    public static function string_has_bom(string $str): bool
8644
    {
8645
        /** @noinspection PhpUnusedLocalVariableInspection */
8646 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8647 6
            if (\strpos($str, $bom_string) === 0) {
8648 6
                return true;
8649
            }
8650
        }
8651
8652 6
        return false;
8653
    }
8654
8655
    /**
8656
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8657
     *
8658
     * @see http://php.net/manual/en/function.strip-tags.php
8659
     *
8660
     * @param string $str            <p>
8661
     *                               The input string.
8662
     *                               </p>
8663
     * @param string $allowable_tags [optional] <p>
8664
     *                               You can use the optional second parameter to specify tags which should
8665
     *                               not be stripped.
8666
     *                               </p>
8667
     *                               <p>
8668
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8669
     *                               can not be changed with allowable_tags.
8670
     *                               </p>
8671
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8672
     *
8673
     * @return string
8674
     *                <p>The stripped string.</p>
8675
     */
8676 4
    public static function strip_tags(
8677
        string $str,
8678
        string $allowable_tags = null,
8679
        bool $clean_utf8 = false
8680
    ): string {
8681 4
        if ($str === '') {
8682 1
            return '';
8683
        }
8684
8685 4
        if ($clean_utf8 === true) {
8686 2
            $str = self::clean($str);
8687
        }
8688
8689 4
        if ($allowable_tags === null) {
8690 4
            return \strip_tags($str);
8691
        }
8692
8693 2
        return \strip_tags($str, $allowable_tags);
8694
    }
8695
8696
    /**
8697
     * Strip all whitespace characters. This includes tabs and newline
8698
     * characters, as well as multibyte whitespace such as the thin space
8699
     * and ideographic space.
8700
     *
8701
     * @param string $str
8702
     *
8703
     * @return string
8704
     */
8705 36
    public static function strip_whitespace(string $str): string
8706
    {
8707 36
        if ($str === '') {
8708 3
            return '';
8709
        }
8710
8711 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8712
    }
8713
8714
    /**
8715
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
8716
     *
8717
     * @see http://php.net/manual/en/function.mb-stripos.php
8718
     *
8719
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8720
     * @param string $needle     <p>The string to find in haystack.</p>
8721
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8722
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8723
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8724
     *
8725
     * @return false|int
8726
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8727
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8728
     */
8729 24
    public static function stripos(
8730
        string $haystack,
8731
        string $needle,
8732
        int $offset = 0,
8733
        $encoding = 'UTF-8',
8734
        bool $clean_utf8 = false
8735
    ) {
8736 24
        if ($haystack === '' || $needle === '') {
8737 5
            return false;
8738
        }
8739
8740 23
        if ($clean_utf8 === true) {
8741
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8742
            // if invalid characters are found in $haystack before $needle
8743 1
            $haystack = self::clean($haystack);
8744 1
            $needle = self::clean($needle);
8745
        }
8746
8747 23
        if (self::$SUPPORT['mbstring'] === true) {
8748 23
            if ($encoding === 'UTF-8') {
8749 23
                return \mb_stripos($haystack, $needle, $offset);
8750
            }
8751
8752 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8753
8754 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8755
        }
8756
8757 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8758
8759
        if (
8760 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8761
            &&
8762 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8763
            &&
8764 2
            self::$SUPPORT['intl'] === true
8765
        ) {
8766
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8767
            if ($return_tmp !== false) {
8768
                return $return_tmp;
8769
            }
8770
        }
8771
8772
        //
8773
        // fallback for ascii only
8774
        //
8775
8776 2
        if (ASCII::is_ascii($haystack . $needle)) {
8777
            return \stripos($haystack, $needle, $offset);
8778
        }
8779
8780
        //
8781
        // fallback via vanilla php
8782
        //
8783
8784 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8785 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8786
8787 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8788
    }
8789
8790
    /**
8791
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8792
     *
8793
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8794
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8795
     * @param bool   $before_needle [optional] <p>
8796
     *                              If <b>TRUE</b>, it returns the part of the
8797
     *                              haystack before the first occurrence of the needle (excluding the needle).
8798
     *                              </p>
8799
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8800
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8801
     *
8802
     * @return false|string
8803
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8804
     */
8805 12
    public static function stristr(
8806
        string $haystack,
8807
        string $needle,
8808
        bool $before_needle = false,
8809
        string $encoding = 'UTF-8',
8810
        bool $clean_utf8 = false
8811
    ) {
8812 12
        if ($haystack === '' || $needle === '') {
8813 3
            return false;
8814
        }
8815
8816 9
        if ($clean_utf8 === true) {
8817
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8818
            // if invalid characters are found in $haystack before $needle
8819 1
            $needle = self::clean($needle);
8820 1
            $haystack = self::clean($haystack);
8821
        }
8822
8823 9
        if (!$needle) {
8824
            return $haystack;
8825
        }
8826
8827 9
        if (self::$SUPPORT['mbstring'] === true) {
8828 9
            if ($encoding === 'UTF-8') {
8829 9
                return \mb_stristr($haystack, $needle, $before_needle);
8830
            }
8831
8832 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8833
8834 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8835
        }
8836
8837
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8838
8839
        if (
8840
            $encoding !== 'UTF-8'
8841
            &&
8842
            self::$SUPPORT['mbstring'] === false
8843
        ) {
8844
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8845
        }
8846
8847
        if (
8848
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8849
            &&
8850
            self::$SUPPORT['intl'] === true
8851
        ) {
8852
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8853
            if ($return_tmp !== false) {
8854
                return $return_tmp;
8855
            }
8856
        }
8857
8858
        if (ASCII::is_ascii($needle . $haystack)) {
8859
            return \stristr($haystack, $needle, $before_needle);
8860
        }
8861
8862
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8863
8864
        if (!isset($match[1])) {
8865
            return false;
8866
        }
8867
8868
        if ($before_needle) {
8869
            return $match[1];
8870
        }
8871
8872
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8873
    }
8874
8875
    /**
8876
     * Get the string length, not the byte-length!
8877
     *
8878
     * @see http://php.net/manual/en/function.mb-strlen.php
8879
     *
8880
     * @param string $str        <p>The string being checked for length.</p>
8881
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8882
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8883
     *
8884
     * @return false|int
8885
     *                   <p>
8886
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8887
     *                   $encoding.
8888
     *                   (One multi-byte character counted as +1).
8889
     *                   <br>
8890
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8891
     *                   chars.
8892
     *                   </p>
8893
     */
8894 173
    public static function strlen(
8895
        string $str,
8896
        string $encoding = 'UTF-8',
8897
        bool $clean_utf8 = false
8898
    ) {
8899 173
        if ($str === '') {
8900 21
            return 0;
8901
        }
8902
8903 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8904 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8905
        }
8906
8907 171
        if ($clean_utf8 === true) {
8908
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8909
            // if invalid characters are found in $str
8910 4
            $str = self::clean($str);
8911
        }
8912
8913
        //
8914
        // fallback via mbstring
8915
        //
8916
8917 171
        if (self::$SUPPORT['mbstring'] === true) {
8918 165
            if ($encoding === 'UTF-8') {
8919 165
                return \mb_strlen($str);
8920
            }
8921
8922 4
            return \mb_strlen($str, $encoding);
8923
        }
8924
8925
        //
8926
        // fallback for binary || ascii only
8927
        //
8928
8929
        if (
8930 8
            $encoding === 'CP850'
8931
            ||
8932 8
            $encoding === 'ASCII'
8933
        ) {
8934
            return \strlen($str);
8935
        }
8936
8937
        if (
8938 8
            $encoding !== 'UTF-8'
8939
            &&
8940 8
            self::$SUPPORT['mbstring'] === false
8941
            &&
8942 8
            self::$SUPPORT['iconv'] === false
8943
        ) {
8944 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8945
        }
8946
8947
        //
8948
        // fallback via iconv
8949
        //
8950
8951 8
        if (self::$SUPPORT['iconv'] === true) {
8952
            $return_tmp = \iconv_strlen($str, $encoding);
8953
            if ($return_tmp !== false) {
8954
                return $return_tmp;
8955
            }
8956
        }
8957
8958
        //
8959
        // fallback via intl
8960
        //
8961
8962
        if (
8963 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8964
            &&
8965 8
            self::$SUPPORT['intl'] === true
8966
        ) {
8967
            $return_tmp = \grapheme_strlen($str);
8968
            if ($return_tmp !== null) {
8969
                return $return_tmp;
8970
            }
8971
        }
8972
8973
        //
8974
        // fallback for ascii only
8975
        //
8976
8977 8
        if (ASCII::is_ascii($str)) {
8978 4
            return \strlen($str);
8979
        }
8980
8981
        //
8982
        // fallback via vanilla php
8983
        //
8984
8985 8
        \preg_match_all('/./us', $str, $parts);
8986
8987 8
        $return_tmp = \count($parts[0]);
8988 8
        if ($return_tmp === 0) {
8989
            return false;
8990
        }
8991
8992 8
        return $return_tmp;
8993
    }
8994
8995
    /**
8996
     * Get string length in byte.
8997
     *
8998
     * @param string $str
8999
     *
9000
     * @return int
9001
     */
9002
    public static function strlen_in_byte(string $str): int
9003
    {
9004
        if ($str === '') {
9005
            return 0;
9006
        }
9007
9008
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9009
            // "mb_" is available if overload is used, so use it ...
9010
            return \mb_strlen($str, 'CP850'); // 8-BIT
9011
        }
9012
9013
        return \strlen($str);
9014
    }
9015
9016
    /**
9017
     * Case-insensitive string comparisons using a "natural order" algorithm.
9018
     *
9019
     * INFO: natural order version of UTF8::strcasecmp()
9020
     *
9021
     * @param string $str1     <p>The first string.</p>
9022
     * @param string $str2     <p>The second string.</p>
9023
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9024
     *
9025
     * @return int
9026
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9027
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9028
     *             <strong>0</strong> if they are equal
9029
     */
9030 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9031
    {
9032 2
        return self::strnatcmp(
9033 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9034 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9035
        );
9036
    }
9037
9038
    /**
9039
     * String comparisons using a "natural order" algorithm
9040
     *
9041
     * INFO: natural order version of UTF8::strcmp()
9042
     *
9043
     * @see http://php.net/manual/en/function.strnatcmp.php
9044
     *
9045
     * @param string $str1 <p>The first string.</p>
9046
     * @param string $str2 <p>The second string.</p>
9047
     *
9048
     * @return int
9049
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9050
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9051
     *             <strong>0</strong> if they are equal
9052
     */
9053 4
    public static function strnatcmp(string $str1, string $str2): int
9054
    {
9055 4
        if ($str1 === $str2) {
9056 4
            return 0;
9057
        }
9058
9059 4
        return \strnatcmp(
9060 4
            (string) self::strtonatfold($str1),
9061 4
            (string) self::strtonatfold($str2)
9062
        );
9063
    }
9064
9065
    /**
9066
     * Case-insensitive string comparison of the first n characters.
9067
     *
9068
     * @see http://php.net/manual/en/function.strncasecmp.php
9069
     *
9070
     * @param string $str1     <p>The first string.</p>
9071
     * @param string $str2     <p>The second string.</p>
9072
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9073
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9074
     *
9075
     * @return int
9076
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9077
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9078
     *             <strong>0</strong> if they are equal
9079
     */
9080 2
    public static function strncasecmp(
9081
        string $str1,
9082
        string $str2,
9083
        int $len,
9084
        string $encoding = 'UTF-8'
9085
    ): int {
9086 2
        return self::strncmp(
9087 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9088 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9089 2
            $len
9090
        );
9091
    }
9092
9093
    /**
9094
     * String comparison of the first n characters.
9095
     *
9096
     * @see http://php.net/manual/en/function.strncmp.php
9097
     *
9098
     * @param string $str1     <p>The first string.</p>
9099
     * @param string $str2     <p>The second string.</p>
9100
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9101
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9102
     *
9103
     * @return int
9104
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9105
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9106
     *             <strong>0</strong> if they are equal
9107
     */
9108 4
    public static function strncmp(
9109
        string $str1,
9110
        string $str2,
9111
        int $len,
9112
        string $encoding = 'UTF-8'
9113
    ): int {
9114 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9115
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9116
        }
9117
9118 4
        if ($encoding === 'UTF-8') {
9119 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9120 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9121
        } else {
9122
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9123
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9124
        }
9125
9126 4
        return self::strcmp($str1, $str2);
9127
    }
9128
9129
    /**
9130
     * Search a string for any of a set of characters.
9131
     *
9132
     * @see http://php.net/manual/en/function.strpbrk.php
9133
     *
9134
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9135
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9136
     *
9137
     * @return false|string string starting from the character found, or false if it is not found
9138
     */
9139 2
    public static function strpbrk(string $haystack, string $char_list)
9140
    {
9141 2
        if ($haystack === '' || $char_list === '') {
9142 2
            return false;
9143
        }
9144
9145 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9146 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9147
        }
9148
9149 2
        return false;
9150
    }
9151
9152
    /**
9153
     * Find the position of the first occurrence of a substring in a string.
9154
     *
9155
     * @see http://php.net/manual/en/function.mb-strpos.php
9156
     *
9157
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9158
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9159
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9160
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9161
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9162
     *
9163
     * @return false|int
9164
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9165
     *                   string.<br> If needle is not found it returns false.
9166
     */
9167 53
    public static function strpos(
9168
        string $haystack,
9169
        $needle,
9170
        int $offset = 0,
9171
        $encoding = 'UTF-8',
9172
        bool $clean_utf8 = false
9173
    ) {
9174 53
        if ($haystack === '') {
9175 4
            return false;
9176
        }
9177
9178
        // iconv and mbstring do not support integer $needle
9179 52
        if ((int) $needle === $needle) {
9180
            $needle = (string) self::chr($needle);
9181
        }
9182 52
        $needle = (string) $needle;
9183
9184 52
        if ($needle === '') {
9185 2
            return false;
9186
        }
9187
9188 52
        if ($clean_utf8 === true) {
9189
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9190
            // if invalid characters are found in $haystack before $needle
9191 3
            $needle = self::clean($needle);
9192 3
            $haystack = self::clean($haystack);
9193
        }
9194
9195 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9196 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9197
        }
9198
9199
        //
9200
        // fallback via mbstring
9201
        //
9202
9203 52
        if (self::$SUPPORT['mbstring'] === true) {
9204 50
            if ($encoding === 'UTF-8') {
9205 50
                return \mb_strpos($haystack, $needle, $offset);
9206
            }
9207
9208 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9209
        }
9210
9211
        //
9212
        // fallback for binary || ascii only
9213
        //
9214
        if (
9215 4
            $encoding === 'CP850'
9216
            ||
9217 4
            $encoding === 'ASCII'
9218
        ) {
9219 2
            return \strpos($haystack, $needle, $offset);
9220
        }
9221
9222
        if (
9223 4
            $encoding !== 'UTF-8'
9224
            &&
9225 4
            self::$SUPPORT['iconv'] === false
9226
            &&
9227 4
            self::$SUPPORT['mbstring'] === false
9228
        ) {
9229 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9230
        }
9231
9232
        //
9233
        // fallback via intl
9234
        //
9235
9236
        if (
9237 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9238
            &&
9239 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9240
            &&
9241 4
            self::$SUPPORT['intl'] === true
9242
        ) {
9243
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9244
            if ($return_tmp !== false) {
9245
                return $return_tmp;
9246
            }
9247
        }
9248
9249
        //
9250
        // fallback via iconv
9251
        //
9252
9253
        if (
9254 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9255
            &&
9256 4
            self::$SUPPORT['iconv'] === true
9257
        ) {
9258
            // ignore invalid negative offset to keep compatibility
9259
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9260
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9261
            if ($return_tmp !== false) {
9262
                return $return_tmp;
9263
            }
9264
        }
9265
9266
        //
9267
        // fallback for ascii only
9268
        //
9269
9270 4
        if (ASCII::is_ascii($haystack . $needle)) {
9271 2
            return \strpos($haystack, $needle, $offset);
9272
        }
9273
9274
        //
9275
        // fallback via vanilla php
9276
        //
9277
9278 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9279 4
        if ($haystack_tmp === false) {
9280
            $haystack_tmp = '';
9281
        }
9282 4
        $haystack = (string) $haystack_tmp;
9283
9284 4
        if ($offset < 0) {
9285
            $offset = 0;
9286
        }
9287
9288 4
        $pos = \strpos($haystack, $needle);
9289 4
        if ($pos === false) {
9290 2
            return false;
9291
        }
9292
9293 4
        if ($pos) {
9294 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9295
        }
9296
9297 2
        return $offset + 0;
9298
    }
9299
9300
    /**
9301
     * Find the position of the first occurrence of a substring in a string.
9302
     *
9303
     * @param string $haystack <p>
9304
     *                         The string being checked.
9305
     *                         </p>
9306
     * @param string $needle   <p>
9307
     *                         The position counted from the beginning of haystack.
9308
     *                         </p>
9309
     * @param int    $offset   [optional] <p>
9310
     *                         The search offset. If it is not specified, 0 is used.
9311
     *                         </p>
9312
     *
9313
     * @return false|int The numeric position of the first occurrence of needle in the
9314
     *                   haystack string. If needle is not found, it returns false.
9315
     */
9316
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9317
    {
9318
        if ($haystack === '' || $needle === '') {
9319
            return false;
9320
        }
9321
9322
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9323
            // "mb_" is available if overload is used, so use it ...
9324
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9325
        }
9326
9327
        return \strpos($haystack, $needle, $offset);
9328
    }
9329
9330
    /**
9331
     * Find the last occurrence of a character in a string within another.
9332
     *
9333
     * @see http://php.net/manual/en/function.mb-strrchr.php
9334
     *
9335
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9336
     * @param string $needle        <p>The string to find in haystack</p>
9337
     * @param bool   $before_needle [optional] <p>
9338
     *                              Determines which portion of haystack
9339
     *                              this function returns.
9340
     *                              If set to true, it returns all of haystack
9341
     *                              from the beginning to the last occurrence of needle.
9342
     *                              If set to false, it returns all of haystack
9343
     *                              from the last occurrence of needle to the end,
9344
     *                              </p>
9345
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9346
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9347
     *
9348
     * @return false|string the portion of haystack or false if needle is not found
9349
     */
9350 2
    public static function strrchr(
9351
        string $haystack,
9352
        string $needle,
9353
        bool $before_needle = false,
9354
        string $encoding = 'UTF-8',
9355
        bool $clean_utf8 = false
9356
    ) {
9357 2
        if ($haystack === '' || $needle === '') {
9358 2
            return false;
9359
        }
9360
9361 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9362 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9363
        }
9364
9365 2
        if ($clean_utf8 === true) {
9366
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9367
            // if invalid characters are found in $haystack before $needle
9368 2
            $needle = self::clean($needle);
9369 2
            $haystack = self::clean($haystack);
9370
        }
9371
9372
        //
9373
        // fallback via mbstring
9374
        //
9375
9376 2
        if (self::$SUPPORT['mbstring'] === true) {
9377 2
            if ($encoding === 'UTF-8') {
9378 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9379
            }
9380
9381 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9382
        }
9383
9384
        //
9385
        // fallback for binary || ascii only
9386
        //
9387
9388
        if (
9389
            $before_needle === false
9390
            &&
9391
            (
9392
                $encoding === 'CP850'
9393
                ||
9394
                $encoding === 'ASCII'
9395
            )
9396
        ) {
9397
            return \strrchr($haystack, $needle);
9398
        }
9399
9400
        if (
9401
            $encoding !== 'UTF-8'
9402
            &&
9403
            self::$SUPPORT['mbstring'] === false
9404
        ) {
9405
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9406
        }
9407
9408
        //
9409
        // fallback via iconv
9410
        //
9411
9412
        if (self::$SUPPORT['iconv'] === true) {
9413
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9414
            if ($needle_tmp === false) {
9415
                return false;
9416
            }
9417
            $needle = (string) $needle_tmp;
9418
9419
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9420
            if ($pos === false) {
9421
                return false;
9422
            }
9423
9424
            if ($before_needle) {
9425
                return self::substr($haystack, 0, $pos, $encoding);
9426
            }
9427
9428
            return self::substr($haystack, $pos, null, $encoding);
9429
        }
9430
9431
        //
9432
        // fallback via vanilla php
9433
        //
9434
9435
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9436
        if ($needle_tmp === false) {
9437
            return false;
9438
        }
9439
        $needle = (string) $needle_tmp;
9440
9441
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9442
        if ($pos === false) {
9443
            return false;
9444
        }
9445
9446
        if ($before_needle) {
9447
            return self::substr($haystack, 0, $pos, $encoding);
9448
        }
9449
9450
        return self::substr($haystack, $pos, null, $encoding);
9451
    }
9452
9453
    /**
9454
     * Reverses characters order in the string.
9455
     *
9456
     * @param string $str      <p>The input string.</p>
9457
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9458
     *
9459
     * @return string the string with characters in the reverse sequence
9460
     */
9461 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9462
    {
9463 10
        if ($str === '') {
9464 4
            return '';
9465
        }
9466
9467
        // init
9468 8
        $reversed = '';
9469
9470 8
        $str = self::emoji_encode($str, true);
9471
9472 8
        if ($encoding === 'UTF-8') {
9473 8
            if (self::$SUPPORT['intl'] === true) {
9474
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9475 8
                $i = (int) \grapheme_strlen($str);
9476 8
                while ($i--) {
9477 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9478 8
                    if ($reversed_tmp !== false) {
9479 8
                        $reversed .= $reversed_tmp;
9480
                    }
9481
                }
9482
            } else {
9483
                $i = (int) \mb_strlen($str);
9484 8
                while ($i--) {
9485
                    $reversed_tmp = \mb_substr($str, $i, 1);
9486
                    if ($reversed_tmp !== false) {
9487
                        $reversed .= $reversed_tmp;
9488
                    }
9489
                }
9490
            }
9491
        } else {
9492
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9493
9494
            $i = (int) self::strlen($str, $encoding);
9495
            while ($i--) {
9496
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9497
                if ($reversed_tmp !== false) {
9498
                    $reversed .= $reversed_tmp;
9499
                }
9500
            }
9501
        }
9502
9503 8
        return self::emoji_decode($reversed, true);
9504
    }
9505
9506
    /**
9507
     * Find the last occurrence of a character in a string within another, case-insensitive.
9508
     *
9509
     * @see http://php.net/manual/en/function.mb-strrichr.php
9510
     *
9511
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9512
     * @param string $needle        <p>The string to find in haystack.</p>
9513
     * @param bool   $before_needle [optional] <p>
9514
     *                              Determines which portion of haystack
9515
     *                              this function returns.
9516
     *                              If set to true, it returns all of haystack
9517
     *                              from the beginning to the last occurrence of needle.
9518
     *                              If set to false, it returns all of haystack
9519
     *                              from the last occurrence of needle to the end,
9520
     *                              </p>
9521
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9522
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9523
     *
9524
     * @return false|string the portion of haystack or<br>false if needle is not found
9525
     */
9526 3
    public static function strrichr(
9527
        string $haystack,
9528
        string $needle,
9529
        bool $before_needle = false,
9530
        string $encoding = 'UTF-8',
9531
        bool $clean_utf8 = false
9532
    ) {
9533 3
        if ($haystack === '' || $needle === '') {
9534 2
            return false;
9535
        }
9536
9537 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9538 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9539
        }
9540
9541 3
        if ($clean_utf8 === true) {
9542
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9543
            // if invalid characters are found in $haystack before $needle
9544 2
            $needle = self::clean($needle);
9545 2
            $haystack = self::clean($haystack);
9546
        }
9547
9548
        //
9549
        // fallback via mbstring
9550
        //
9551
9552 3
        if (self::$SUPPORT['mbstring'] === true) {
9553 3
            if ($encoding === 'UTF-8') {
9554 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9555
            }
9556
9557 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9558
        }
9559
9560
        //
9561
        // fallback via vanilla php
9562
        //
9563
9564
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9565
        if ($needle_tmp === false) {
9566
            return false;
9567
        }
9568
        $needle = (string) $needle_tmp;
9569
9570
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9571
        if ($pos === false) {
9572
            return false;
9573
        }
9574
9575
        if ($before_needle) {
9576
            return self::substr($haystack, 0, $pos, $encoding);
9577
        }
9578
9579
        return self::substr($haystack, $pos, null, $encoding);
9580
    }
9581
9582
    /**
9583
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
9584
     *
9585
     * @param string     $haystack   <p>The string to look in.</p>
9586
     * @param int|string $needle     <p>The string to look for.</p>
9587
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9588
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9589
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9590
     *
9591
     * @return false|int
9592
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9593
     *                   string.<br>If needle is not found, it returns false.</p>
9594
     */
9595 3
    public static function strripos(
9596
        string $haystack,
9597
        $needle,
9598
        int $offset = 0,
9599
        string $encoding = 'UTF-8',
9600
        bool $clean_utf8 = false
9601
    ) {
9602 3
        if ($haystack === '') {
9603
            return false;
9604
        }
9605
9606
        // iconv and mbstring do not support integer $needle
9607 3
        if ((int) $needle === $needle && $needle >= 0) {
9608
            $needle = (string) self::chr($needle);
9609
        }
9610 3
        $needle = (string) $needle;
9611
9612 3
        if ($needle === '') {
9613
            return false;
9614
        }
9615
9616 3
        if ($clean_utf8 === true) {
9617
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9618 2
            $needle = self::clean($needle);
9619 2
            $haystack = self::clean($haystack);
9620
        }
9621
9622 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9623 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9624
        }
9625
9626
        //
9627
        // fallback via mbstrig
9628
        //
9629
9630 3
        if (self::$SUPPORT['mbstring'] === true) {
9631 3
            if ($encoding === 'UTF-8') {
9632 3
                return \mb_strripos($haystack, $needle, $offset);
9633
            }
9634
9635
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9636
        }
9637
9638
        //
9639
        // fallback for binary || ascii only
9640
        //
9641
9642
        if (
9643
            $encoding === 'CP850'
9644
            ||
9645
            $encoding === 'ASCII'
9646
        ) {
9647
            return \strripos($haystack, $needle, $offset);
9648
        }
9649
9650
        if (
9651
            $encoding !== 'UTF-8'
9652
            &&
9653
            self::$SUPPORT['mbstring'] === false
9654
        ) {
9655
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9656
        }
9657
9658
        //
9659
        // fallback via intl
9660
        //
9661
9662
        if (
9663
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9664
            &&
9665
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9666
            &&
9667
            self::$SUPPORT['intl'] === true
9668
        ) {
9669
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9670
            if ($return_tmp !== false) {
9671
                return $return_tmp;
9672
            }
9673
        }
9674
9675
        //
9676
        // fallback for ascii only
9677
        //
9678
9679
        if (ASCII::is_ascii($haystack . $needle)) {
9680
            return \strripos($haystack, $needle, $offset);
9681
        }
9682
9683
        //
9684
        // fallback via vanilla php
9685
        //
9686
9687
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9688
        $needle = self::strtocasefold($needle, true, false, $encoding);
9689
9690
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9691
    }
9692
9693
    /**
9694
     * Finds position of last occurrence of a string within another, case-insensitive.
9695
     *
9696
     * @param string $haystack <p>
9697
     *                         The string from which to get the position of the last occurrence
9698
     *                         of needle.
9699
     *                         </p>
9700
     * @param string $needle   <p>
9701
     *                         The string to find in haystack.
9702
     *                         </p>
9703
     * @param int    $offset   [optional] <p>
9704
     *                         The position in haystack
9705
     *                         to start searching.
9706
     *                         </p>
9707
     *
9708
     * @return false|int
9709
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9710
     *                   haystack string, or false if needle is not found.</p>
9711
     */
9712
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9713
    {
9714
        if ($haystack === '' || $needle === '') {
9715
            return false;
9716
        }
9717
9718
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9719
            // "mb_" is available if overload is used, so use it ...
9720
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9721
        }
9722
9723
        return \strripos($haystack, $needle, $offset);
9724
    }
9725
9726
    /**
9727
     * Find the position of the last occurrence of a substring in a string.
9728
     *
9729
     * @see http://php.net/manual/en/function.mb-strrpos.php
9730
     *
9731
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9732
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9733
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9734
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9735
     *                               the end of the string.
9736
     *                               </p>
9737
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9738
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9739
     *
9740
     * @return false|int
9741
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9742
     *                   string.<br>If needle is not found, it returns false.</p>
9743
     */
9744 35
    public static function strrpos(
9745
        string $haystack,
9746
        $needle,
9747
        int $offset = 0,
9748
        string $encoding = 'UTF-8',
9749
        bool $clean_utf8 = false
9750
    ) {
9751 35
        if ($haystack === '') {
9752 3
            return false;
9753
        }
9754
9755
        // iconv and mbstring do not support integer $needle
9756 34
        if ((int) $needle === $needle && $needle >= 0) {
9757 2
            $needle = (string) self::chr($needle);
9758
        }
9759 34
        $needle = (string) $needle;
9760
9761 34
        if ($needle === '') {
9762 2
            return false;
9763
        }
9764
9765 34
        if ($clean_utf8 === true) {
9766
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9767 4
            $needle = self::clean($needle);
9768 4
            $haystack = self::clean($haystack);
9769
        }
9770
9771 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9772 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9773
        }
9774
9775
        //
9776
        // fallback via mbstring
9777
        //
9778
9779 34
        if (self::$SUPPORT['mbstring'] === true) {
9780 34
            if ($encoding === 'UTF-8') {
9781 34
                return \mb_strrpos($haystack, $needle, $offset);
9782
            }
9783
9784 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9785
        }
9786
9787
        //
9788
        // fallback for binary || ascii only
9789
        //
9790
9791
        if (
9792
            $encoding === 'CP850'
9793
            ||
9794
            $encoding === 'ASCII'
9795
        ) {
9796
            return \strrpos($haystack, $needle, $offset);
9797
        }
9798
9799
        if (
9800
            $encoding !== 'UTF-8'
9801
            &&
9802
            self::$SUPPORT['mbstring'] === false
9803
        ) {
9804
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9805
        }
9806
9807
        //
9808
        // fallback via intl
9809
        //
9810
9811
        if (
9812
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9813
            &&
9814
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9815
            &&
9816
            self::$SUPPORT['intl'] === true
9817
        ) {
9818
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9819
            if ($return_tmp !== false) {
9820
                return $return_tmp;
9821
            }
9822
        }
9823
9824
        //
9825
        // fallback for ascii only
9826
        //
9827
9828
        if (ASCII::is_ascii($haystack . $needle)) {
9829
            return \strrpos($haystack, $needle, $offset);
9830
        }
9831
9832
        //
9833
        // fallback via vanilla php
9834
        //
9835
9836
        $haystack_tmp = null;
9837
        if ($offset > 0) {
9838
            $haystack_tmp = self::substr($haystack, $offset);
9839
        } elseif ($offset < 0) {
9840
            $haystack_tmp = self::substr($haystack, 0, $offset);
9841
            $offset = 0;
9842
        }
9843
9844
        if ($haystack_tmp !== null) {
9845
            if ($haystack_tmp === false) {
9846
                $haystack_tmp = '';
9847
            }
9848
            $haystack = (string) $haystack_tmp;
9849
        }
9850
9851
        $pos = \strrpos($haystack, $needle);
9852
        if ($pos === false) {
9853
            return false;
9854
        }
9855
9856
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
9857
        $str_tmp = \substr($haystack, 0, $pos);
9858
        if ($str_tmp === false) {
9859
            return false;
9860
        }
9861
9862
        return $offset + (int) self::strlen($str_tmp);
9863
    }
9864
9865
    /**
9866
     * Find the position of the last occurrence of a substring in a string.
9867
     *
9868
     * @param string $haystack <p>
9869
     *                         The string being checked, for the last occurrence
9870
     *                         of needle.
9871
     *                         </p>
9872
     * @param string $needle   <p>
9873
     *                         The string to find in haystack.
9874
     *                         </p>
9875
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9876
     *                         the string. Negative values will stop searching at an arbitrary point
9877
     *                         prior to the end of the string.
9878
     *                         </p>
9879
     *
9880
     * @return false|int
9881
     *                   <p>The numeric position of the last occurrence of needle in the
9882
     *                   haystack string. If needle is not found, it returns false.</p>
9883
     */
9884
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9885
    {
9886
        if ($haystack === '' || $needle === '') {
9887
            return false;
9888
        }
9889
9890
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9891
            // "mb_" is available if overload is used, so use it ...
9892
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9893
        }
9894
9895
        return \strrpos($haystack, $needle, $offset);
9896
    }
9897
9898
    /**
9899
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9900
     * mask.
9901
     *
9902
     * @param string $str      <p>The input string.</p>
9903
     * @param string $mask     <p>The mask of chars</p>
9904
     * @param int    $offset   [optional]
9905
     * @param int    $length   [optional]
9906
     * @param string $encoding [optional] <p>Set the charset.</p>
9907
     *
9908
     * @return false|int
9909
     */
9910 10
    public static function strspn(
9911
        string $str,
9912
        string $mask,
9913
        int $offset = 0,
9914
        int $length = null,
9915
        string $encoding = 'UTF-8'
9916
    ) {
9917 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9918
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9919
        }
9920
9921 10
        if ($offset || $length !== null) {
9922 2
            if ($encoding === 'UTF-8') {
9923 2
                if ($length === null) {
9924
                    $str = (string) \mb_substr($str, $offset);
9925
                } else {
9926 2
                    $str = (string) \mb_substr($str, $offset, $length);
9927
                }
9928
            } else {
9929
                $str = (string) self::substr($str, $offset, $length, $encoding);
9930
            }
9931
        }
9932
9933 10
        if ($str === '' || $mask === '') {
9934 2
            return 0;
9935
        }
9936
9937 8
        $matches = [];
9938
9939 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9940
    }
9941
9942
    /**
9943
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9944
     *
9945
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9946
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9947
     * @param bool   $before_needle [optional] <p>
9948
     *                              If <b>TRUE</b>, strstr() returns the part of the
9949
     *                              haystack before the first occurrence of the needle (excluding the needle).
9950
     *                              </p>
9951
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9952
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9953
     *
9954
     * @return false|string
9955
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9956
     */
9957 3
    public static function strstr(
9958
        string $haystack,
9959
        string $needle,
9960
        bool $before_needle = false,
9961
        string $encoding = 'UTF-8',
9962
        $clean_utf8 = false
9963
    ) {
9964 3
        if ($haystack === '' || $needle === '') {
9965 2
            return false;
9966
        }
9967
9968 3
        if ($clean_utf8 === true) {
9969
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9970
            // if invalid characters are found in $haystack before $needle
9971
            $needle = self::clean($needle);
9972
            $haystack = self::clean($haystack);
9973
        }
9974
9975 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9976 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9977
        }
9978
9979
        //
9980
        // fallback via mbstring
9981
        //
9982
9983 3
        if (self::$SUPPORT['mbstring'] === true) {
9984 3
            if ($encoding === 'UTF-8') {
9985 3
                return \mb_strstr($haystack, $needle, $before_needle);
9986
            }
9987
9988 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9989
        }
9990
9991
        //
9992
        // fallback for binary || ascii only
9993
        //
9994
9995
        if (
9996
            $encoding === 'CP850'
9997
            ||
9998
            $encoding === 'ASCII'
9999
        ) {
10000
            return \strstr($haystack, $needle, $before_needle);
10001
        }
10002
10003
        if (
10004
            $encoding !== 'UTF-8'
10005
            &&
10006
            self::$SUPPORT['mbstring'] === false
10007
        ) {
10008
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10009
        }
10010
10011
        //
10012
        // fallback via intl
10013
        //
10014
10015
        if (
10016
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10017
            &&
10018
            self::$SUPPORT['intl'] === true
10019
        ) {
10020
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10021
            if ($return_tmp !== false) {
10022
                return $return_tmp;
10023
            }
10024
        }
10025
10026
        //
10027
        // fallback for ascii only
10028
        //
10029
10030
        if (ASCII::is_ascii($haystack . $needle)) {
10031
            return \strstr($haystack, $needle, $before_needle);
10032
        }
10033
10034
        //
10035
        // fallback via vanilla php
10036
        //
10037
10038
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10039
10040
        if (!isset($match[1])) {
10041
            return false;
10042
        }
10043
10044
        if ($before_needle) {
10045
            return $match[1];
10046
        }
10047
10048
        return self::substr($haystack, (int) self::strlen($match[1]));
10049
    }
10050
10051
    /**
10052
     *  * Finds first occurrence of a string within another.
10053
     *
10054
     * @param string $haystack      <p>
10055
     *                              The string from which to get the first occurrence
10056
     *                              of needle.
10057
     *                              </p>
10058
     * @param string $needle        <p>
10059
     *                              The string to find in haystack.
10060
     *                              </p>
10061
     * @param bool   $before_needle [optional] <p>
10062
     *                              Determines which portion of haystack
10063
     *                              this function returns.
10064
     *                              If set to true, it returns all of haystack
10065
     *                              from the beginning to the first occurrence of needle.
10066
     *                              If set to false, it returns all of haystack
10067
     *                              from the first occurrence of needle to the end,
10068
     *                              </p>
10069
     *
10070
     * @return false|string
10071
     *                      <p>The portion of haystack,
10072
     *                      or false if needle is not found.</p>
10073
     */
10074
    public static function strstr_in_byte(
10075
        string $haystack,
10076
        string $needle,
10077
        bool $before_needle = false
10078
    ) {
10079
        if ($haystack === '' || $needle === '') {
10080
            return false;
10081
        }
10082
10083
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10084
            // "mb_" is available if overload is used, so use it ...
10085
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10086
        }
10087
10088
        return \strstr($haystack, $needle, $before_needle);
10089
    }
10090
10091
    /**
10092
     * Unicode transformation for case-less matching.
10093
     *
10094
     * @see http://unicode.org/reports/tr21/tr21-5.html
10095
     *
10096
     * @param string      $str        <p>The input string.</p>
10097
     * @param bool        $full       [optional] <p>
10098
     *                                <b>true</b>, replace full case folding chars (default)<br>
10099
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10100
     *                                </p>
10101
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10102
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10103
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10104
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10105
     *                                is for some languages better ...</p>
10106
     *
10107
     * @return string
10108
     */
10109 32
    public static function strtocasefold(
10110
        string $str,
10111
        bool $full = true,
10112
        bool $clean_utf8 = false,
10113
        string $encoding = 'UTF-8',
10114
        string $lang = null,
10115
        $lower = true
10116
    ): string {
10117 32
        if ($str === '') {
10118 5
            return '';
10119
        }
10120
10121 31
        if ($clean_utf8 === true) {
10122
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10123
            // if invalid characters are found in $haystack before $needle
10124 2
            $str = self::clean($str);
10125
        }
10126
10127 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10128
10129 31
        if ($lang === null && $encoding === 'UTF-8') {
10130 31
            if ($lower === true) {
10131 2
                return \mb_strtolower($str);
10132
            }
10133
10134 29
            return \mb_strtoupper($str);
10135
        }
10136
10137 2
        if ($lower === true) {
10138
            return self::strtolower($str, $encoding, false, $lang);
10139
        }
10140
10141 2
        return self::strtoupper($str, $encoding, false, $lang);
10142
    }
10143
10144
    /**
10145
     * Make a string lowercase.
10146
     *
10147
     * @see http://php.net/manual/en/function.mb-strtolower.php
10148
     *
10149
     * @param string      $str                           <p>The string being lowercased.</p>
10150
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10151
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10152
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10153
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10154
     *
10155
     * @return string
10156
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10157
     */
10158 73
    public static function strtolower(
10159
        $str,
10160
        string $encoding = 'UTF-8',
10161
        bool $clean_utf8 = false,
10162
        string $lang = null,
10163
        bool $try_to_keep_the_string_length = false
10164
    ): string {
10165
        // init
10166 73
        $str = (string) $str;
10167
10168 73
        if ($str === '') {
10169 1
            return '';
10170
        }
10171
10172 72
        if ($clean_utf8 === true) {
10173
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10174
            // if invalid characters are found in $haystack before $needle
10175 2
            $str = self::clean($str);
10176
        }
10177
10178
        // hack for old php version or for the polyfill ...
10179 72
        if ($try_to_keep_the_string_length === true) {
10180
            $str = self::fixStrCaseHelper($str, true);
10181
        }
10182
10183 72
        if ($lang === null && $encoding === 'UTF-8') {
10184 13
            return \mb_strtolower($str);
10185
        }
10186
10187 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10188
10189 61
        if ($lang !== null) {
10190 2
            if (self::$SUPPORT['intl'] === true) {
10191 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10192
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10193
                }
10194
10195 2
                $language_code = $lang . '-Lower';
10196 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10197
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10198
10199
                    $language_code = 'Any-Lower';
10200
                }
10201
10202
                /** @noinspection PhpComposerExtensionStubsInspection */
10203
                /** @noinspection UnnecessaryCastingInspection */
10204 2
                return (string) \transliterator_transliterate($language_code, $str);
10205
            }
10206
10207
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10208
        }
10209
10210
        // always fallback via symfony polyfill
10211 61
        return \mb_strtolower($str, $encoding);
10212
    }
10213
10214
    /**
10215
     * Make a string uppercase.
10216
     *
10217
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10218
     *
10219
     * @param string      $str                           <p>The string being uppercased.</p>
10220
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10221
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10222
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10223
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10224
     *
10225
     * @return string
10226
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10227
     */
10228 17
    public static function strtoupper(
10229
        $str,
10230
        string $encoding = 'UTF-8',
10231
        bool $clean_utf8 = false,
10232
        string $lang = null,
10233
        bool $try_to_keep_the_string_length = false
10234
    ): string {
10235
        // init
10236 17
        $str = (string) $str;
10237
10238 17
        if ($str === '') {
10239 1
            return '';
10240
        }
10241
10242 16
        if ($clean_utf8 === true) {
10243
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10244
            // if invalid characters are found in $haystack before $needle
10245 2
            $str = self::clean($str);
10246
        }
10247
10248
        // hack for old php version or for the polyfill ...
10249 16
        if ($try_to_keep_the_string_length === true) {
10250 2
            $str = self::fixStrCaseHelper($str, false);
10251
        }
10252
10253 16
        if ($lang === null && $encoding === 'UTF-8') {
10254 8
            return \mb_strtoupper($str);
10255
        }
10256
10257 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10258
10259 10
        if ($lang !== null) {
10260 2
            if (self::$SUPPORT['intl'] === true) {
10261 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10262
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10263
                }
10264
10265 2
                $language_code = $lang . '-Upper';
10266 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10267
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10268
10269
                    $language_code = 'Any-Upper';
10270
                }
10271
10272
                /** @noinspection PhpComposerExtensionStubsInspection */
10273
                /** @noinspection UnnecessaryCastingInspection */
10274 2
                return (string) \transliterator_transliterate($language_code, $str);
10275
            }
10276
10277
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10278
        }
10279
10280
        // always fallback via symfony polyfill
10281 10
        return \mb_strtoupper($str, $encoding);
10282
    }
10283
10284
    /**
10285
     * Translate characters or replace sub-strings.
10286
     *
10287
     * @see http://php.net/manual/en/function.strtr.php
10288
     *
10289
     * @param string          $str  <p>The string being translated.</p>
10290
     * @param string|string[] $from <p>The string replacing from.</p>
10291
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10292
     *
10293
     * @return string
10294
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10295
     *                corresponding character in "to".</p>
10296
     */
10297 2
    public static function strtr(string $str, $from, $to = ''): string
10298
    {
10299 2
        if ($str === '') {
10300
            return '';
10301
        }
10302
10303 2
        if ($from === $to) {
10304
            return $str;
10305
        }
10306
10307 2
        if ($to !== '') {
10308 2
            $from = self::str_split($from);
10309 2
            $to = self::str_split($to);
10310 2
            $count_from = \count($from);
10311 2
            $count_to = \count($to);
10312
10313 2
            if ($count_from > $count_to) {
10314 2
                $from = \array_slice($from, 0, $count_to);
10315 2
            } elseif ($count_from < $count_to) {
10316 2
                $to = \array_slice($to, 0, $count_from);
10317
            }
10318
10319 2
            $from = \array_combine($from, $to);
10320
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10321 2
            if ($from === false) {
10322
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10323
            }
10324
        }
10325
10326 2
        if (\is_string($from)) {
10327 2
            return \str_replace($from, '', $str);
10328
        }
10329
10330 2
        return \strtr($str, $from);
10331
    }
10332
10333
    /**
10334
     * Return the width of a string.
10335
     *
10336
     * @param string $str        <p>The input string.</p>
10337
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10338
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10339
     *
10340
     * @return int
10341
     */
10342 2
    public static function strwidth(
10343
        string $str,
10344
        string $encoding = 'UTF-8',
10345
        bool $clean_utf8 = false
10346
    ): int {
10347 2
        if ($str === '') {
10348 2
            return 0;
10349
        }
10350
10351 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10352 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10353
        }
10354
10355 2
        if ($clean_utf8 === true) {
10356
            // iconv and mbstring are not tolerant to invalid encoding
10357
            // further, their behaviour is inconsistent with that of PHP's substr
10358 2
            $str = self::clean($str);
10359
        }
10360
10361
        //
10362
        // fallback via mbstring
10363
        //
10364
10365 2
        if (self::$SUPPORT['mbstring'] === true) {
10366 2
            if ($encoding === 'UTF-8') {
10367 2
                return \mb_strwidth($str);
10368
            }
10369
10370
            return \mb_strwidth($str, $encoding);
10371
        }
10372
10373
        //
10374
        // fallback via vanilla php
10375
        //
10376
10377
        if ($encoding !== 'UTF-8') {
10378
            $str = self::encode('UTF-8', $str, false, $encoding);
10379
        }
10380
10381
        $wide = 0;
10382
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10383
10384
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10385
    }
10386
10387
    /**
10388
     * Get part of a string.
10389
     *
10390
     * @see http://php.net/manual/en/function.mb-substr.php
10391
     *
10392
     * @param string $str        <p>The string being checked.</p>
10393
     * @param int    $offset     <p>The first position used in str.</p>
10394
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10395
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10396
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10397
     *
10398
     * @return false|string
10399
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10400
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10401
     *                      characters long, <b>FALSE</b> will be returned.
10402
     */
10403 172
    public static function substr(
10404
        string $str,
10405
        int $offset = 0,
10406
        int $length = null,
10407
        string $encoding = 'UTF-8',
10408
        bool $clean_utf8 = false
10409
    ) {
10410
        // empty string
10411 172
        if ($str === '' || $length === 0) {
10412 8
            return '';
10413
        }
10414
10415 168
        if ($clean_utf8 === true) {
10416
            // iconv and mbstring are not tolerant to invalid encoding
10417
            // further, their behaviour is inconsistent with that of PHP's substr
10418 2
            $str = self::clean($str);
10419
        }
10420
10421
        // whole string
10422 168
        if (!$offset && $length === null) {
10423 7
            return $str;
10424
        }
10425
10426 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10427 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10428
        }
10429
10430
        //
10431
        // fallback via mbstring
10432
        //
10433
10434 163
        if (self::$SUPPORT['mbstring'] === true) {
10435 161
            if ($encoding === 'UTF-8') {
10436 161
                if ($length === null) {
10437 64
                    return \mb_substr($str, $offset);
10438
                }
10439
10440 102
                return \mb_substr($str, $offset, $length);
10441
            }
10442
10443
            return self::substr($str, $offset, $length, $encoding);
10444
        }
10445
10446
        //
10447
        // fallback for binary || ascii only
10448
        //
10449
10450
        if (
10451 4
            $encoding === 'CP850'
10452
            ||
10453 4
            $encoding === 'ASCII'
10454
        ) {
10455
            if ($length === null) {
10456
                return \substr($str, $offset);
10457
            }
10458
10459
            return \substr($str, $offset, $length);
10460
        }
10461
10462
        // otherwise we need the string-length
10463 4
        $str_length = 0;
10464 4
        if ($offset || $length === null) {
10465 4
            $str_length = self::strlen($str, $encoding);
10466
        }
10467
10468
        // e.g.: invalid chars + mbstring not installed
10469 4
        if ($str_length === false) {
10470
            return false;
10471
        }
10472
10473
        // empty string
10474 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10475
            return '';
10476
        }
10477
10478
        // impossible
10479 4
        if ($offset && $offset > $str_length) {
10480
            return '';
10481
        }
10482
10483 4
        if ($length === null) {
10484 4
            $length = (int) $str_length;
10485
        } else {
10486 2
            $length = (int) $length;
10487
        }
10488
10489
        if (
10490 4
            $encoding !== 'UTF-8'
10491
            &&
10492 4
            self::$SUPPORT['mbstring'] === false
10493
        ) {
10494 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10495
        }
10496
10497
        //
10498
        // fallback via intl
10499
        //
10500
10501
        if (
10502 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10503
            &&
10504 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10505
            &&
10506 4
            self::$SUPPORT['intl'] === true
10507
        ) {
10508
            $return_tmp = \grapheme_substr($str, $offset, $length);
10509
            if ($return_tmp !== false) {
10510
                return $return_tmp;
10511
            }
10512
        }
10513
10514
        //
10515
        // fallback via iconv
10516
        //
10517
10518
        if (
10519 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10520
            &&
10521 4
            self::$SUPPORT['iconv'] === true
10522
        ) {
10523
            $return_tmp = \iconv_substr($str, $offset, $length);
10524
            if ($return_tmp !== false) {
10525
                return $return_tmp;
10526
            }
10527
        }
10528
10529
        //
10530
        // fallback for ascii only
10531
        //
10532
10533 4
        if (ASCII::is_ascii($str)) {
10534
            return \substr($str, $offset, $length);
10535
        }
10536
10537
        //
10538
        // fallback via vanilla php
10539
        //
10540
10541
        // split to array, and remove invalid characters
10542 4
        $array = self::str_split($str);
10543
10544
        // extract relevant part, and join to make sting again
10545 4
        return \implode('', \array_slice($array, $offset, $length));
10546
    }
10547
10548
    /**
10549
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
10550
     *
10551
     * @param string   $str1               <p>The main string being compared.</p>
10552
     * @param string   $str2               <p>The secondary string being compared.</p>
10553
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10554
     *                                     counting from the end of the string.</p>
10555
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10556
     *                                     of the length of the str compared to the length of main_str less the
10557
     *                                     offset.</p>
10558
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10559
     *                                     insensitive.</p>
10560
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10561
     *
10562
     * @return int
10563
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10564
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10565
     *             <strong>0</strong> if they are equal
10566
     */
10567 2
    public static function substr_compare(
10568
        string $str1,
10569
        string $str2,
10570
        int $offset = 0,
10571
        int $length = null,
10572
        bool $case_insensitivity = false,
10573
        string $encoding = 'UTF-8'
10574
    ): int {
10575
        if (
10576 2
            $offset !== 0
10577
            ||
10578 2
            $length !== null
10579
        ) {
10580 2
            if ($encoding === 'UTF-8') {
10581 2
                if ($length === null) {
10582 2
                    $str1 = (string) \mb_substr($str1, $offset);
10583
                } else {
10584 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10585
                }
10586 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10587
            } else {
10588
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10589
10590
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10591
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10592
            }
10593
        }
10594
10595 2
        if ($case_insensitivity === true) {
10596 2
            return self::strcasecmp($str1, $str2, $encoding);
10597
        }
10598
10599 2
        return self::strcmp($str1, $str2);
10600
    }
10601
10602
    /**
10603
     * Count the number of substring occurrences.
10604
     *
10605
     * @see http://php.net/manual/en/function.substr-count.php
10606
     *
10607
     * @param string $haystack   <p>The string to search in.</p>
10608
     * @param string $needle     <p>The substring to search for.</p>
10609
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10610
     * @param int    $length     [optional] <p>
10611
     *                           The maximum length after the specified offset to search for the
10612
     *                           substring. It outputs a warning if the offset plus the length is
10613
     *                           greater than the haystack length.
10614
     *                           </p>
10615
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10616
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10617
     *
10618
     * @return false|int this functions returns an integer or false if there isn't a string
10619
     */
10620 5
    public static function substr_count(
10621
        string $haystack,
10622
        string $needle,
10623
        int $offset = 0,
10624
        int $length = null,
10625
        string $encoding = 'UTF-8',
10626
        bool $clean_utf8 = false
10627
    ) {
10628 5
        if ($haystack === '' || $needle === '') {
10629 2
            return false;
10630
        }
10631
10632 5
        if ($length === 0) {
10633 2
            return 0;
10634
        }
10635
10636 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10637 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10638
        }
10639
10640 5
        if ($clean_utf8 === true) {
10641
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10642
            // if invalid characters are found in $haystack before $needle
10643
            $needle = self::clean($needle);
10644
            $haystack = self::clean($haystack);
10645
        }
10646
10647 5
        if ($offset || $length > 0) {
10648 2
            if ($length === null) {
10649 2
                $length_tmp = self::strlen($haystack, $encoding);
10650 2
                if ($length_tmp === false) {
10651
                    return false;
10652
                }
10653 2
                $length = (int) $length_tmp;
10654
            }
10655
10656 2
            if ($encoding === 'UTF-8') {
10657 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10658
            } else {
10659 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10660
            }
10661
        }
10662
10663
        if (
10664 5
            $encoding !== 'UTF-8'
10665
            &&
10666 5
            self::$SUPPORT['mbstring'] === false
10667
        ) {
10668
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10669
        }
10670
10671 5
        if (self::$SUPPORT['mbstring'] === true) {
10672 5
            if ($encoding === 'UTF-8') {
10673 5
                return \mb_substr_count($haystack, $needle);
10674
            }
10675
10676 2
            return \mb_substr_count($haystack, $needle, $encoding);
10677
        }
10678
10679
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10680
10681
        return \count($matches);
10682
    }
10683
10684
    /**
10685
     * Count the number of substring occurrences.
10686
     *
10687
     * @param string $haystack <p>
10688
     *                         The string being checked.
10689
     *                         </p>
10690
     * @param string $needle   <p>
10691
     *                         The string being found.
10692
     *                         </p>
10693
     * @param int    $offset   [optional] <p>
10694
     *                         The offset where to start counting
10695
     *                         </p>
10696
     * @param int    $length   [optional] <p>
10697
     *                         The maximum length after the specified offset to search for the
10698
     *                         substring. It outputs a warning if the offset plus the length is
10699
     *                         greater than the haystack length.
10700
     *                         </p>
10701
     *
10702
     * @return false|int the number of times the
10703
     *                   needle substring occurs in the
10704
     *                   haystack string
10705
     */
10706
    public static function substr_count_in_byte(
10707
        string $haystack,
10708
        string $needle,
10709
        int $offset = 0,
10710
        int $length = null
10711
    ) {
10712
        if ($haystack === '' || $needle === '') {
10713
            return 0;
10714
        }
10715
10716
        if (
10717
            ($offset || $length !== null)
10718
            &&
10719
            self::$SUPPORT['mbstring_func_overload'] === true
10720
        ) {
10721
            if ($length === null) {
10722
                $length_tmp = self::strlen($haystack);
10723
                if ($length_tmp === false) {
10724
                    return false;
10725
                }
10726
                $length = (int) $length_tmp;
10727
            }
10728
10729
            if (
10730
                (
10731
                    $length !== 0
10732
                    &&
10733
                    $offset !== 0
10734
                )
10735
                &&
10736
                ($length + $offset) <= 0
10737
                &&
10738
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10739
            ) {
10740
                return false;
10741
            }
10742
10743
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
10744
            $haystack_tmp = \substr($haystack, $offset, $length);
10745
            if ($haystack_tmp === false) {
10746
                $haystack_tmp = '';
10747
            }
10748
            $haystack = (string) $haystack_tmp;
10749
        }
10750
10751
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10752
            // "mb_" is available if overload is used, so use it ...
10753
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10754
        }
10755
10756
        if ($length === null) {
10757
            return \substr_count($haystack, $needle, $offset);
10758
        }
10759
10760
        return \substr_count($haystack, $needle, $offset, $length);
10761
    }
10762
10763
    /**
10764
     * Returns the number of occurrences of $substring in the given string.
10765
     * By default, the comparison is case-sensitive, but can be made insensitive
10766
     * by setting $case_sensitive to false.
10767
     *
10768
     * @param string $str            <p>The input string.</p>
10769
     * @param string $substring      <p>The substring to search for.</p>
10770
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10771
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10772
     *
10773
     * @return int
10774
     */
10775 15
    public static function substr_count_simple(
10776
        string $str,
10777
        string $substring,
10778
        bool $case_sensitive = true,
10779
        string $encoding = 'UTF-8'
10780
    ): int {
10781 15
        if ($str === '' || $substring === '') {
10782 2
            return 0;
10783
        }
10784
10785 13
        if ($encoding === 'UTF-8') {
10786 7
            if ($case_sensitive) {
10787
                return (int) \mb_substr_count($str, $substring);
10788
            }
10789
10790 7
            return (int) \mb_substr_count(
10791 7
                \mb_strtoupper($str),
10792 7
                \mb_strtoupper($substring)
10793
            );
10794
        }
10795
10796 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10797
10798 6
        if ($case_sensitive) {
10799 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10800
        }
10801
10802 3
        return (int) \mb_substr_count(
10803 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10804 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10805 3
            $encoding
10806
        );
10807
    }
10808
10809
    /**
10810
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
10811
     *
10812
     * @param string $haystack <p>The string to search in.</p>
10813
     * @param string $needle   <p>The substring to search for.</p>
10814
     *
10815
     * @return string return the sub-string
10816
     */
10817 2
    public static function substr_ileft(string $haystack, string $needle): string
10818
    {
10819 2
        if ($haystack === '') {
10820 2
            return '';
10821
        }
10822
10823 2
        if ($needle === '') {
10824 2
            return $haystack;
10825
        }
10826
10827 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10828 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10829
        }
10830
10831 2
        return $haystack;
10832
    }
10833
10834
    /**
10835
     * Get part of a string process in bytes.
10836
     *
10837
     * @param string $str    <p>The string being checked.</p>
10838
     * @param int    $offset <p>The first position used in str.</p>
10839
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10840
     *
10841
     * @return false|string
10842
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10843
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10844
     *                      characters long, <b>FALSE</b> will be returned.
10845
     */
10846
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10847
    {
10848
        // empty string
10849
        if ($str === '' || $length === 0) {
10850
            return '';
10851
        }
10852
10853
        // whole string
10854
        if (!$offset && $length === null) {
10855
            return $str;
10856
        }
10857
10858
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10859
            // "mb_" is available if overload is used, so use it ...
10860
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10861
        }
10862
10863
        return \substr($str, $offset, $length ?? 2147483647);
10864
    }
10865
10866
    /**
10867
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
10868
     *
10869
     * @param string $haystack <p>The string to search in.</p>
10870
     * @param string $needle   <p>The substring to search for.</p>
10871
     *
10872
     * @return string return the sub-string
10873
     */
10874 2
    public static function substr_iright(string $haystack, string $needle): string
10875
    {
10876 2
        if ($haystack === '') {
10877 2
            return '';
10878
        }
10879
10880 2
        if ($needle === '') {
10881 2
            return $haystack;
10882
        }
10883
10884 2
        if (self::str_iends_with($haystack, $needle) === true) {
10885 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10886
        }
10887
10888 2
        return $haystack;
10889
    }
10890
10891
    /**
10892
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
10893
     *
10894
     * @param string $haystack <p>The string to search in.</p>
10895
     * @param string $needle   <p>The substring to search for.</p>
10896
     *
10897
     * @return string return the sub-string
10898
     */
10899 2
    public static function substr_left(string $haystack, string $needle): string
10900
    {
10901 2
        if ($haystack === '') {
10902 2
            return '';
10903
        }
10904
10905 2
        if ($needle === '') {
10906 2
            return $haystack;
10907
        }
10908
10909 2
        if (self::str_starts_with($haystack, $needle) === true) {
10910 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10911
        }
10912
10913 2
        return $haystack;
10914
    }
10915
10916
    /**
10917
     * Replace text within a portion of a string.
10918
     *
10919
     * source: https://gist.github.com/stemar/8287074
10920
     *
10921
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10922
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10923
     * @param int|int[]       $offset      <p>
10924
     *                                     If start is positive, the replacing will begin at the start'th offset
10925
     *                                     into string.
10926
     *                                     <br><br>
10927
     *                                     If start is negative, the replacing will begin at the start'th character
10928
     *                                     from the end of string.
10929
     *                                     </p>
10930
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10931
     *                                     portion of string which is to be replaced. If it is negative, it
10932
     *                                     represents the number of characters from the end of string at which to
10933
     *                                     stop replacing. If it is not given, then it will default to strlen(
10934
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10935
     *                                     length is zero then this function will have the effect of inserting
10936
     *                                     replacement into string at the given start offset.</p>
10937
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10938
     *
10939
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10940
     */
10941 10
    public static function substr_replace(
10942
        $str,
10943
        $replacement,
10944
        $offset,
10945
        $length = null,
10946
        string $encoding = 'UTF-8'
10947
    ) {
10948 10
        if (\is_array($str) === true) {
10949 1
            $num = \count($str);
10950
10951
            // the replacement
10952 1
            if (\is_array($replacement) === true) {
10953 1
                $replacement = \array_slice($replacement, 0, $num);
10954
            } else {
10955 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10956
            }
10957
10958
            // the offset
10959 1
            if (\is_array($offset) === true) {
10960 1
                $offset = \array_slice($offset, 0, $num);
10961 1
                foreach ($offset as &$value_tmp) {
10962 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10963
                }
10964 1
                unset($value_tmp);
10965
            } else {
10966 1
                $offset = \array_pad([$offset], $num, $offset);
10967
            }
10968
10969
            // the length
10970 1
            if ($length === null) {
10971 1
                $length = \array_fill(0, $num, 0);
10972 1
            } elseif (\is_array($length) === true) {
10973 1
                $length = \array_slice($length, 0, $num);
10974 1
                foreach ($length as &$value_tmp_V2) {
10975 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
10976
                }
10977 1
                unset($value_tmp_V2);
10978
            } else {
10979 1
                $length = \array_pad([$length], $num, $length);
10980
            }
10981
10982
            // recursive call
10983 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10984
        }
10985
10986 10
        if (\is_array($replacement) === true) {
10987 1
            if ($replacement !== []) {
10988 1
                $replacement = $replacement[0];
10989
            } else {
10990 1
                $replacement = '';
10991
            }
10992
        }
10993
10994
        // init
10995 10
        $str = (string) $str;
10996 10
        $replacement = (string) $replacement;
10997
10998 10
        if (\is_array($length) === true) {
10999
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11000
        }
11001
11002 10
        if (\is_array($offset) === true) {
11003
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11004
        }
11005
11006 10
        if ($str === '') {
11007 1
            return $replacement;
11008
        }
11009
11010 9
        if (self::$SUPPORT['mbstring'] === true) {
11011 9
            $string_length = (int) self::strlen($str, $encoding);
11012
11013 9
            if ($offset < 0) {
11014 1
                $offset = (int) \max(0, $string_length + $offset);
11015 9
            } elseif ($offset > $string_length) {
11016 1
                $offset = $string_length;
11017
            }
11018
11019 9
            if ($length !== null && $length < 0) {
11020 1
                $length = (int) \max(0, $string_length - $offset + $length);
11021 9
            } elseif ($length === null || $length > $string_length) {
11022 4
                $length = $string_length;
11023
            }
11024
11025
            /** @noinspection AdditionOperationOnArraysInspection */
11026 9
            if (($offset + $length) > $string_length) {
11027 4
                $length = $string_length - $offset;
11028
            }
11029
11030
            /** @noinspection AdditionOperationOnArraysInspection */
11031 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11032 9
                   $replacement .
11033 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11034
        }
11035
11036
        //
11037
        // fallback for ascii only
11038
        //
11039
11040
        if (ASCII::is_ascii($str)) {
11041
            return ($length === null) ?
11042
                \substr_replace($str, $replacement, $offset) :
11043
                \substr_replace($str, $replacement, $offset, $length);
11044
        }
11045
11046
        //
11047
        // fallback via vanilla php
11048
        //
11049
11050
        \preg_match_all('/./us', $str, $str_matches);
11051
        \preg_match_all('/./us', $replacement, $replacement_matches);
11052
11053
        if ($length === null) {
11054
            $length_tmp = self::strlen($str, $encoding);
11055
            if ($length_tmp === false) {
11056
                // e.g.: non mbstring support + invalid chars
11057
                return '';
11058
            }
11059
            $length = (int) $length_tmp;
11060
        }
11061
11062
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
11063
11064
        return \implode('', $str_matches[0]);
11065
    }
11066
11067
    /**
11068
     * Removes a suffix ($needle) from the end of the string ($haystack).
11069
     *
11070
     * @param string $haystack <p>The string to search in.</p>
11071
     * @param string $needle   <p>The substring to search for.</p>
11072
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
11073
     *
11074
     * @return string return the sub-string
11075
     */
11076 2
    public static function substr_right(
11077
        string $haystack,
11078
        string $needle,
11079
        string $encoding = 'UTF-8'
11080
    ): string {
11081 2
        if ($haystack === '') {
11082 2
            return '';
11083
        }
11084
11085 2
        if ($needle === '') {
11086 2
            return $haystack;
11087
        }
11088
11089
        if (
11090 2
            $encoding === 'UTF-8'
11091
            &&
11092 2
            \substr($haystack, -\strlen($needle)) === $needle
11093
        ) {
11094 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
11095
        }
11096
11097 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
11098
            return (string) self::substr(
11099
                $haystack,
11100
                0,
11101
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
11102
                $encoding
11103
            );
11104
        }
11105
11106 2
        return $haystack;
11107
    }
11108
11109
    /**
11110
     * Returns a case swapped version of the string.
11111
     *
11112
     * @param string $str        <p>The input string.</p>
11113
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11114
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11115
     *
11116
     * @return string each character's case swapped
11117
     */
11118 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
11119
    {
11120 6
        if ($str === '') {
11121 1
            return '';
11122
        }
11123
11124 6
        if ($clean_utf8 === true) {
11125
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11126
            // if invalid characters are found in $haystack before $needle
11127 2
            $str = self::clean($str);
11128
        }
11129
11130 6
        if ($encoding === 'UTF-8') {
11131 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11132
        }
11133
11134 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11135
    }
11136
11137
    /**
11138
     * Checks whether symfony-polyfills are used.
11139
     *
11140
     * @return bool
11141
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11142
     */
11143
    public static function symfony_polyfill_used(): bool
11144
    {
11145
        // init
11146
        $return = false;
11147
11148
        $return_tmp = \extension_loaded('mbstring');
11149
        if ($return_tmp === false && \function_exists('mb_strlen')) {
11150
            $return = true;
11151
        }
11152
11153
        $return_tmp = \extension_loaded('iconv');
11154
        if ($return_tmp === false && \function_exists('iconv')) {
11155
            $return = true;
11156
        }
11157
11158
        return $return;
11159
    }
11160
11161
    /**
11162
     * @param string $str
11163
     * @param int    $tab_length
11164
     *
11165
     * @return string
11166
     */
11167 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11168
    {
11169 6
        if ($tab_length === 4) {
11170 3
            $spaces = '    ';
11171 3
        } elseif ($tab_length === 2) {
11172 1
            $spaces = '  ';
11173
        } else {
11174 2
            $spaces = \str_repeat(' ', $tab_length);
11175
        }
11176
11177 6
        return \str_replace("\t", $spaces, $str);
11178
    }
11179
11180
    /**
11181
     * Converts the first character of each word in the string to uppercase
11182
     * and all other chars to lowercase.
11183
     *
11184
     * @param string      $str                           <p>The input string.</p>
11185
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11186
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11187
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11188
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11189
     *
11190
     * @return string
11191
     *                <p>A string with all characters of $str being title-cased.</p>
11192
     */
11193 5
    public static function titlecase(
11194
        string $str,
11195
        string $encoding = 'UTF-8',
11196
        bool $clean_utf8 = false,
11197
        string $lang = null,
11198
        bool $try_to_keep_the_string_length = false
11199
    ): string {
11200 5
        if ($clean_utf8 === true) {
11201
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11202
            // if invalid characters are found in $haystack before $needle
11203
            $str = self::clean($str);
11204
        }
11205
11206 5
        if ($lang === null && $try_to_keep_the_string_length === false) {
11207 5
            if ($encoding === 'UTF-8') {
11208 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11209
            }
11210
11211 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11212
11213 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11214
        }
11215
11216
        return self::str_titleize(
11217
            $str,
11218
            null,
11219
            $encoding,
11220
            false,
11221
            $lang,
11222
            $try_to_keep_the_string_length,
11223
            false
11224
        );
11225
    }
11226
11227
    /**
11228
     * alias for "UTF8::to_ascii()"
11229
     *
11230
     * @param string $str
11231
     * @param string $subst_chr
11232
     * @param bool   $strict
11233
     *
11234
     * @return string
11235
     *
11236
     * @see UTF8::to_ascii()
11237
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11238
     */
11239 7
    public static function toAscii(
11240
        string $str,
11241
        string $subst_chr = '?',
11242
        bool $strict = false
11243
    ): string {
11244 7
        return self::to_ascii($str, $subst_chr, $strict);
11245
    }
11246
11247
    /**
11248
     * alias for "UTF8::to_iso8859()"
11249
     *
11250
     * @param string|string[] $str
11251
     *
11252
     * @return string|string[]
11253
     *
11254
     * @see UTF8::to_iso8859()
11255
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11256
     */
11257 2
    public static function toIso8859($str)
11258
    {
11259 2
        return self::to_iso8859($str);
11260
    }
11261
11262
    /**
11263
     * alias for "UTF8::to_latin1()"
11264
     *
11265
     * @param string|string[] $str
11266
     *
11267
     * @return string|string[]
11268
     *
11269
     * @see UTF8::to_iso8859()
11270
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11271
     */
11272 2
    public static function toLatin1($str)
11273
    {
11274 2
        return self::to_iso8859($str);
11275
    }
11276
11277
    /**
11278
     * alias for "UTF8::to_utf8()"
11279
     *
11280
     * @param string|string[] $str
11281
     *
11282
     * @return string|string[]
11283
     *
11284
     * @see UTF8::to_utf8()
11285
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11286
     */
11287 2
    public static function toUTF8($str)
11288
    {
11289 2
        return self::to_utf8($str);
11290
    }
11291
11292
    /**
11293
     * Convert a string into ASCII.
11294
     *
11295
     * @param string $str     <p>The input string.</p>
11296
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11297
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11298
     *                        performance</p>
11299
     *
11300
     * @return string
11301
     */
11302 37
    public static function to_ascii(
11303
        string $str,
11304
        string $unknown = '?',
11305
        bool $strict = false
11306
    ): string {
11307 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11308
    }
11309
11310
    /**
11311
     * @param mixed $str
11312
     *
11313
     * @return bool
11314
     */
11315 19
    public static function to_boolean($str): bool
11316
    {
11317
        // init
11318 19
        $str = (string) $str;
11319
11320 19
        if ($str === '') {
11321 2
            return false;
11322
        }
11323
11324
        // Info: http://php.net/manual/en/filter.filters.validate.php
11325
        $map = [
11326 17
            'true'  => true,
11327
            '1'     => true,
11328
            'on'    => true,
11329
            'yes'   => true,
11330
            'false' => false,
11331
            '0'     => false,
11332
            'off'   => false,
11333
            'no'    => false,
11334
        ];
11335
11336 17
        if (isset($map[$str])) {
11337 11
            return $map[$str];
11338
        }
11339
11340 6
        $key = \strtolower($str);
11341 6
        if (isset($map[$key])) {
11342 2
            return $map[$key];
11343
        }
11344
11345 4
        if (\is_numeric($str)) {
11346 2
            return ((float) $str + 0) > 0;
11347
        }
11348
11349 2
        return (bool) \trim($str);
11350
    }
11351
11352
    /**
11353
     * Convert given string to safe filename (and keep string case).
11354
     *
11355
     * @param string $str
11356
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11357
     *                                  simply replaced with hyphen.
11358
     * @param string $fallback_char
11359
     *
11360
     * @return string
11361
     */
11362 1
    public static function to_filename(
11363
        string $str,
11364
        bool $use_transliterate = false,
11365
        string $fallback_char = '-'
11366
    ): string {
11367 1
        return ASCII::to_filename(
11368 1
            $str,
11369 1
            $use_transliterate,
11370 1
            $fallback_char
11371
        );
11372
    }
11373
11374
    /**
11375
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11376
     *
11377
     * @param string|string[] $str
11378
     *
11379
     * @return string|string[]
11380
     */
11381 8
    public static function to_iso8859($str)
11382
    {
11383 8
        if (\is_array($str) === true) {
11384 2
            foreach ($str as $k => &$v) {
11385 2
                $v = self::to_iso8859($v);
11386
            }
11387
11388 2
            return $str;
11389
        }
11390
11391 8
        $str = (string) $str;
11392 8
        if ($str === '') {
11393 2
            return '';
11394
        }
11395
11396 8
        return self::utf8_decode($str);
11397
    }
11398
11399
    /**
11400
     * alias for "UTF8::to_iso8859()"
11401
     *
11402
     * @param string|string[] $str
11403
     *
11404
     * @return string|string[]
11405
     *
11406
     * @see UTF8::to_iso8859()
11407
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11408
     */
11409 2
    public static function to_latin1($str)
11410
    {
11411 2
        return self::to_iso8859($str);
11412
    }
11413
11414
    /**
11415
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11416
     *
11417
     * <ul>
11418
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
11419
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11420
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11421
     * case.</li>
11422
     * </ul>
11423
     *
11424
     * @param string|string[] $str                        <p>Any string or array.</p>
11425
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11426
     *
11427
     * @return string|string[] the UTF-8 encoded string
11428
     */
11429 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11430
    {
11431 42
        if (\is_array($str) === true) {
11432 4
            foreach ($str as $k => &$v) {
11433 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11434
            }
11435
11436 4
            return $str;
11437
        }
11438
11439 42
        $str = (string) $str;
11440 42
        if ($str === '') {
11441 7
            return $str;
11442
        }
11443
11444 42
        $max = \strlen($str);
11445 42
        $buf = '';
11446
11447 42
        for ($i = 0; $i < $max; ++$i) {
11448 42
            $c1 = $str[$i];
11449
11450 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11451
11452 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11453
11454 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11455
11456 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11457 21
                        $buf .= $c1 . $c2;
11458 21
                        ++$i;
11459
                    } else { // not valid UTF8 - convert it
11460 35
                        $buf .= self::to_utf8_convert_helper($c1);
11461
                    }
11462 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11463
11464 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11465 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11466
11467 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11468 16
                        $buf .= $c1 . $c2 . $c3;
11469 16
                        $i += 2;
11470
                    } else { // not valid UTF8 - convert it
11471 34
                        $buf .= self::to_utf8_convert_helper($c1);
11472
                    }
11473 27
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11474
11475 27
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11476 27
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11477 27
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11478
11479 27
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11480 9
                        $buf .= $c1 . $c2 . $c3 . $c4;
11481 9
                        $i += 3;
11482
                    } else { // not valid UTF8 - convert it
11483 27
                        $buf .= self::to_utf8_convert_helper($c1);
11484
                    }
11485
                } else { // doesn't look like UTF8, but should be converted
11486
11487 38
                    $buf .= self::to_utf8_convert_helper($c1);
11488
                }
11489 39
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11490
11491 4
                $buf .= self::to_utf8_convert_helper($c1);
11492
            } else { // it doesn't need conversion
11493
11494 39
                $buf .= $c1;
11495
            }
11496
        }
11497
11498
        // decode unicode escape sequences + unicode surrogate pairs
11499 42
        $buf = \preg_replace_callback(
11500 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11501
            /**
11502
             * @param array $matches
11503
             *
11504
             * @return string
11505
             */
11506
            static function (array $matches): string {
11507 12
                if (isset($matches[3])) {
11508 12
                    $cp = (int) \hexdec($matches[3]);
11509
                } else {
11510
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11511
                    $cp = ((int) \hexdec($matches[1]) << 10)
11512
                          + (int) \hexdec($matches[2])
11513
                          + 0x10000
11514
                          - (0xD800 << 10)
11515
                          - 0xDC00;
11516
                }
11517
11518
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11519
                //
11520
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11521
11522 12
                if ($cp < 0x80) {
11523 8
                    return (string) self::chr($cp);
11524
                }
11525
11526 9
                if ($cp < 0xA0) {
11527
                    /** @noinspection UnnecessaryCastingInspection */
11528
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11529
                }
11530
11531 9
                return self::decimal_to_chr($cp);
11532 42
            },
11533 42
            $buf
11534
        );
11535
11536 42
        if ($buf === null) {
11537
            return '';
11538
        }
11539
11540
        // decode UTF-8 codepoints
11541 42
        if ($decode_html_entity_to_utf8 === true) {
11542 2
            $buf = self::html_entity_decode($buf);
11543
        }
11544
11545 42
        return $buf;
11546
    }
11547
11548
    /**
11549
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
11550
     *
11551
     * INFO: This is slower then "trim()"
11552
     *
11553
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11554
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11555
     *
11556
     * @param string      $str   <p>The string to be trimmed</p>
11557
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11558
     *
11559
     * @return string the trimmed string
11560
     */
11561 56
    public static function trim(string $str = '', string $chars = null): string
11562
    {
11563 56
        if ($str === '') {
11564 9
            return '';
11565
        }
11566
11567 49
        if (self::$SUPPORT['mbstring'] === true) {
11568 49
            if ($chars) {
11569
                /** @noinspection PregQuoteUsageInspection */
11570 27
                $chars = \preg_quote($chars);
11571 27
                $pattern = "^[${chars}]+|[${chars}]+\$";
11572
            } else {
11573 22
                $pattern = '^[\\s]+|[\\s]+$';
11574
            }
11575
11576
            /** @noinspection PhpComposerExtensionStubsInspection */
11577 49
            return (string) \mb_ereg_replace($pattern, '', $str);
11578
        }
11579
11580 8
        if ($chars) {
11581
            $chars = \preg_quote($chars, '/');
11582
            $pattern = "^[${chars}]+|[${chars}]+\$";
11583
        } else {
11584 8
            $pattern = '^[\\s]+|[\\s]+$';
11585
        }
11586
11587 8
        return self::regex_replace($str, $pattern, '', '', '/');
11588
    }
11589
11590
    /**
11591
     * Makes string's first char uppercase.
11592
     *
11593
     * @param string      $str                           <p>The input string.</p>
11594
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11595
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11596
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11597
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11598
     *
11599
     * @return string the resulting string
11600
     */
11601 69
    public static function ucfirst(
11602
        string $str,
11603
        string $encoding = 'UTF-8',
11604
        bool $clean_utf8 = false,
11605
        string $lang = null,
11606
        bool $try_to_keep_the_string_length = false
11607
    ): string {
11608 69
        if ($str === '') {
11609 3
            return '';
11610
        }
11611
11612 68
        if ($clean_utf8 === true) {
11613
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11614
            // if invalid characters are found in $haystack before $needle
11615 1
            $str = self::clean($str);
11616
        }
11617
11618 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11619
11620 68
        if ($encoding === 'UTF-8') {
11621 22
            $str_part_two = (string) \mb_substr($str, 1);
11622
11623 22
            if ($use_mb_functions === true) {
11624 22
                $str_part_one = \mb_strtoupper(
11625 22
                    (string) \mb_substr($str, 0, 1)
11626
                );
11627
            } else {
11628
                $str_part_one = self::strtoupper(
11629
                    (string) \mb_substr($str, 0, 1),
11630
                    $encoding,
11631
                    false,
11632
                    $lang,
11633 22
                    $try_to_keep_the_string_length
11634
                );
11635
            }
11636
        } else {
11637 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11638
11639 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11640
11641 47
            if ($use_mb_functions === true) {
11642 47
                $str_part_one = \mb_strtoupper(
11643 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11644 47
                    $encoding
11645
                );
11646
            } else {
11647
                $str_part_one = self::strtoupper(
11648
                    (string) self::substr($str, 0, 1, $encoding),
11649
                    $encoding,
11650
                    false,
11651
                    $lang,
11652
                    $try_to_keep_the_string_length
11653
                );
11654
            }
11655
        }
11656
11657 68
        return $str_part_one . $str_part_two;
11658
    }
11659
11660
    /**
11661
     * alias for "UTF8::ucfirst()"
11662
     *
11663
     * @param string $str
11664
     * @param string $encoding
11665
     * @param bool   $clean_utf8
11666
     *
11667
     * @return string
11668
     *
11669
     * @see UTF8::ucfirst()
11670
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
11671
     */
11672 1
    public static function ucword(
11673
        string $str,
11674
        string $encoding = 'UTF-8',
11675
        bool $clean_utf8 = false
11676
    ): string {
11677 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11678
    }
11679
11680
    /**
11681
     * Uppercase for all words in the string.
11682
     *
11683
     * @param string   $str        <p>The input string.</p>
11684
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11685
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11686
     *                             word.</p>
11687
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11688
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11689
     *
11690
     * @return string
11691
     */
11692 8
    public static function ucwords(
11693
        string $str,
11694
        array $exceptions = [],
11695
        string $char_list = '',
11696
        string $encoding = 'UTF-8',
11697
        bool $clean_utf8 = false
11698
    ): string {
11699 8
        if (!$str) {
11700 2
            return '';
11701
        }
11702
11703
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11704
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11705
11706 7
        if ($clean_utf8 === true) {
11707
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11708
            // if invalid characters are found in $haystack before $needle
11709 1
            $str = self::clean($str);
11710
        }
11711
11712 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11713
11714
        if (
11715 7
            $use_php_default_functions === true
11716
            &&
11717 7
            ASCII::is_ascii($str) === true
11718
        ) {
11719
            return \ucwords($str);
11720
        }
11721
11722 7
        $words = self::str_to_words($str, $char_list);
11723 7
        $use_exceptions = $exceptions !== [];
11724
11725 7
        $words_str = '';
11726 7
        foreach ($words as &$word) {
11727 7
            if (!$word) {
11728 7
                continue;
11729
            }
11730
11731
            if (
11732 7
                $use_exceptions === false
11733
                ||
11734 7
                !\in_array($word, $exceptions, true)
11735
            ) {
11736 7
                $words_str .= self::ucfirst($word, $encoding);
11737
            } else {
11738 7
                $words_str .= $word;
11739
            }
11740
        }
11741
11742 7
        return $words_str;
11743
    }
11744
11745
    /**
11746
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
11747
     *
11748
     * e.g:
11749
     * 'test+test'                     => 'test test'
11750
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11751
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11752
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11753
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11754
     * 'Düsseldorf'                   => 'Düsseldorf'
11755
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11756
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11757
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11758
     *
11759
     * @param string $str          <p>The input string.</p>
11760
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11761
     *
11762
     * @return string
11763
     */
11764 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11765
    {
11766 4
        if ($str === '') {
11767 3
            return '';
11768
        }
11769
11770
        if (
11771 4
            \strpos($str, '&') === false
11772
            &&
11773 4
            \strpos($str, '%') === false
11774
            &&
11775 4
            \strpos($str, '+') === false
11776
            &&
11777 4
            \strpos($str, '\u') === false
11778
        ) {
11779 3
            return self::fix_simple_utf8($str);
11780
        }
11781
11782 4
        $str = self::urldecode_unicode_helper($str);
11783
11784 4
        if ($multi_decode === true) {
11785
            do {
11786 3
                $str_compare = $str;
11787
11788
                /**
11789
                 * @psalm-suppress PossiblyInvalidArgument
11790
                 */
11791 3
                $str = self::fix_simple_utf8(
11792 3
                    \urldecode(
11793 3
                        self::html_entity_decode(
11794 3
                            self::to_utf8($str),
11795 3
                            \ENT_QUOTES | \ENT_HTML5
11796
                        )
11797
                    )
11798
                );
11799 3
            } while ($str_compare !== $str);
11800
        } else {
11801
            /**
11802
             * @psalm-suppress PossiblyInvalidArgument
11803
             */
11804 1
            $str = self::fix_simple_utf8(
11805 1
                \urldecode(
11806 1
                    self::html_entity_decode(
11807 1
                        self::to_utf8($str),
11808 1
                        \ENT_QUOTES | \ENT_HTML5
11809
                    )
11810
                )
11811
            );
11812
        }
11813
11814 4
        return $str;
11815
    }
11816
11817
    /**
11818
     * Return a array with "urlencoded"-win1252 -> UTF-8
11819
     *
11820
     * @return string[]
11821
     *
11822
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11823
     */
11824 2
    public static function urldecode_fix_win1252_chars(): array
11825
    {
11826
        return [
11827 2
            '%20' => ' ',
11828
            '%21' => '!',
11829
            '%22' => '"',
11830
            '%23' => '#',
11831
            '%24' => '$',
11832
            '%25' => '%',
11833
            '%26' => '&',
11834
            '%27' => "'",
11835
            '%28' => '(',
11836
            '%29' => ')',
11837
            '%2A' => '*',
11838
            '%2B' => '+',
11839
            '%2C' => ',',
11840
            '%2D' => '-',
11841
            '%2E' => '.',
11842
            '%2F' => '/',
11843
            '%30' => '0',
11844
            '%31' => '1',
11845
            '%32' => '2',
11846
            '%33' => '3',
11847
            '%34' => '4',
11848
            '%35' => '5',
11849
            '%36' => '6',
11850
            '%37' => '7',
11851
            '%38' => '8',
11852
            '%39' => '9',
11853
            '%3A' => ':',
11854
            '%3B' => ';',
11855
            '%3C' => '<',
11856
            '%3D' => '=',
11857
            '%3E' => '>',
11858
            '%3F' => '?',
11859
            '%40' => '@',
11860
            '%41' => 'A',
11861
            '%42' => 'B',
11862
            '%43' => 'C',
11863
            '%44' => 'D',
11864
            '%45' => 'E',
11865
            '%46' => 'F',
11866
            '%47' => 'G',
11867
            '%48' => 'H',
11868
            '%49' => 'I',
11869
            '%4A' => 'J',
11870
            '%4B' => 'K',
11871
            '%4C' => 'L',
11872
            '%4D' => 'M',
11873
            '%4E' => 'N',
11874
            '%4F' => 'O',
11875
            '%50' => 'P',
11876
            '%51' => 'Q',
11877
            '%52' => 'R',
11878
            '%53' => 'S',
11879
            '%54' => 'T',
11880
            '%55' => 'U',
11881
            '%56' => 'V',
11882
            '%57' => 'W',
11883
            '%58' => 'X',
11884
            '%59' => 'Y',
11885
            '%5A' => 'Z',
11886
            '%5B' => '[',
11887
            '%5C' => '\\',
11888
            '%5D' => ']',
11889
            '%5E' => '^',
11890
            '%5F' => '_',
11891
            '%60' => '`',
11892
            '%61' => 'a',
11893
            '%62' => 'b',
11894
            '%63' => 'c',
11895
            '%64' => 'd',
11896
            '%65' => 'e',
11897
            '%66' => 'f',
11898
            '%67' => 'g',
11899
            '%68' => 'h',
11900
            '%69' => 'i',
11901
            '%6A' => 'j',
11902
            '%6B' => 'k',
11903
            '%6C' => 'l',
11904
            '%6D' => 'm',
11905
            '%6E' => 'n',
11906
            '%6F' => 'o',
11907
            '%70' => 'p',
11908
            '%71' => 'q',
11909
            '%72' => 'r',
11910
            '%73' => 's',
11911
            '%74' => 't',
11912
            '%75' => 'u',
11913
            '%76' => 'v',
11914
            '%77' => 'w',
11915
            '%78' => 'x',
11916
            '%79' => 'y',
11917
            '%7A' => 'z',
11918
            '%7B' => '{',
11919
            '%7C' => '|',
11920
            '%7D' => '}',
11921
            '%7E' => '~',
11922
            '%7F' => '',
11923
            '%80' => '`',
11924
            '%81' => '',
11925
            '%82' => '‚',
11926
            '%83' => 'ƒ',
11927
            '%84' => '„',
11928
            '%85' => '…',
11929
            '%86' => '†',
11930
            '%87' => '‡',
11931
            '%88' => 'ˆ',
11932
            '%89' => '‰',
11933
            '%8A' => 'Š',
11934
            '%8B' => '‹',
11935
            '%8C' => 'Œ',
11936
            '%8D' => '',
11937
            '%8E' => 'Ž',
11938
            '%8F' => '',
11939
            '%90' => '',
11940
            '%91' => '‘',
11941
            '%92' => '’',
11942
            '%93' => '“',
11943
            '%94' => '”',
11944
            '%95' => '•',
11945
            '%96' => '–',
11946
            '%97' => '—',
11947
            '%98' => '˜',
11948
            '%99' => '™',
11949
            '%9A' => 'š',
11950
            '%9B' => '›',
11951
            '%9C' => 'œ',
11952
            '%9D' => '',
11953
            '%9E' => 'ž',
11954
            '%9F' => 'Ÿ',
11955
            '%A0' => '',
11956
            '%A1' => '¡',
11957
            '%A2' => '¢',
11958
            '%A3' => '£',
11959
            '%A4' => '¤',
11960
            '%A5' => '¥',
11961
            '%A6' => '¦',
11962
            '%A7' => '§',
11963
            '%A8' => '¨',
11964
            '%A9' => '©',
11965
            '%AA' => 'ª',
11966
            '%AB' => '«',
11967
            '%AC' => '¬',
11968
            '%AD' => '',
11969
            '%AE' => '®',
11970
            '%AF' => '¯',
11971
            '%B0' => '°',
11972
            '%B1' => '±',
11973
            '%B2' => '²',
11974
            '%B3' => '³',
11975
            '%B4' => '´',
11976
            '%B5' => 'µ',
11977
            '%B6' => '¶',
11978
            '%B7' => '·',
11979
            '%B8' => '¸',
11980
            '%B9' => '¹',
11981
            '%BA' => 'º',
11982
            '%BB' => '»',
11983
            '%BC' => '¼',
11984
            '%BD' => '½',
11985
            '%BE' => '¾',
11986
            '%BF' => '¿',
11987
            '%C0' => 'À',
11988
            '%C1' => 'Á',
11989
            '%C2' => 'Â',
11990
            '%C3' => 'Ã',
11991
            '%C4' => 'Ä',
11992
            '%C5' => 'Å',
11993
            '%C6' => 'Æ',
11994
            '%C7' => 'Ç',
11995
            '%C8' => 'È',
11996
            '%C9' => 'É',
11997
            '%CA' => 'Ê',
11998
            '%CB' => 'Ë',
11999
            '%CC' => 'Ì',
12000
            '%CD' => 'Í',
12001
            '%CE' => 'Î',
12002
            '%CF' => 'Ï',
12003
            '%D0' => 'Ð',
12004
            '%D1' => 'Ñ',
12005
            '%D2' => 'Ò',
12006
            '%D3' => 'Ó',
12007
            '%D4' => 'Ô',
12008
            '%D5' => 'Õ',
12009
            '%D6' => 'Ö',
12010
            '%D7' => '×',
12011
            '%D8' => 'Ø',
12012
            '%D9' => 'Ù',
12013
            '%DA' => 'Ú',
12014
            '%DB' => 'Û',
12015
            '%DC' => 'Ü',
12016
            '%DD' => 'Ý',
12017
            '%DE' => 'Þ',
12018
            '%DF' => 'ß',
12019
            '%E0' => 'à',
12020
            '%E1' => 'á',
12021
            '%E2' => 'â',
12022
            '%E3' => 'ã',
12023
            '%E4' => 'ä',
12024
            '%E5' => 'å',
12025
            '%E6' => 'æ',
12026
            '%E7' => 'ç',
12027
            '%E8' => 'è',
12028
            '%E9' => 'é',
12029
            '%EA' => 'ê',
12030
            '%EB' => 'ë',
12031
            '%EC' => 'ì',
12032
            '%ED' => 'í',
12033
            '%EE' => 'î',
12034
            '%EF' => 'ï',
12035
            '%F0' => 'ð',
12036
            '%F1' => 'ñ',
12037
            '%F2' => 'ò',
12038
            '%F3' => 'ó',
12039
            '%F4' => 'ô',
12040
            '%F5' => 'õ',
12041
            '%F6' => 'ö',
12042
            '%F7' => '÷',
12043
            '%F8' => 'ø',
12044
            '%F9' => 'ù',
12045
            '%FA' => 'ú',
12046
            '%FB' => 'û',
12047
            '%FC' => 'ü',
12048
            '%FD' => 'ý',
12049
            '%FE' => 'þ',
12050
            '%FF' => 'ÿ',
12051
        ];
12052
    }
12053
12054
    /**
12055
     * Decodes a UTF-8 string to ISO-8859-1.
12056
     *
12057
     * @param string $str             <p>The input string.</p>
12058
     * @param bool   $keep_utf8_chars
12059
     *
12060
     * @return string
12061
     */
12062 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12063
    {
12064 14
        if ($str === '') {
12065 6
            return '';
12066
        }
12067
12068
        // save for later comparision
12069 14
        $str_backup = $str;
12070 14
        $len = \strlen($str);
12071
12072 14
        if (self::$ORD === null) {
12073
            self::$ORD = self::getData('ord');
12074
        }
12075
12076 14
        if (self::$CHR === null) {
12077
            self::$CHR = self::getData('chr');
12078
        }
12079
12080 14
        $no_char_found = '?';
12081
        /** @noinspection ForeachInvariantsInspection */
12082 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12083 14
            switch ($str[$i] & "\xF0") {
12084 14
                case "\xC0":
12085 13
                case "\xD0":
12086 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12087 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12088
12089 13
                    break;
12090
12091
                /** @noinspection PhpMissingBreakStatementInspection */
12092 13
                case "\xF0":
12093
                    ++$i;
12094
12095
                // no break
12096
12097 13
                case "\xE0":
12098 11
                    $str[$j] = $no_char_found;
12099 11
                    $i += 2;
12100
12101 11
                    break;
12102
12103
                default:
12104 12
                    $str[$j] = $str[$i];
12105
            }
12106
        }
12107
12108
        /** @var false|string $return - needed for PhpStan (stubs error) */
12109 14
        $return = \substr($str, 0, $j);
12110 14
        if ($return === false) {
12111
            $return = '';
12112
        }
12113
12114
        if (
12115 14
            $keep_utf8_chars === true
12116
            &&
12117 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12118
        ) {
12119 2
            return $str_backup;
12120
        }
12121
12122 14
        return $return;
12123
    }
12124
12125
    /**
12126
     * Encodes an ISO-8859-1 string to UTF-8.
12127
     *
12128
     * @param string $str <p>The input string.</p>
12129
     *
12130
     * @return string
12131
     */
12132 14
    public static function utf8_encode(string $str): string
12133
    {
12134 14
        if ($str === '') {
12135 14
            return '';
12136
        }
12137
12138
        /** @var false|string $str - the polyfill maybe return false */
12139 14
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12139
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12140
12141
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12142
        /** @psalm-suppress TypeDoesNotContainType */
12143 14
        if ($str === false) {
12144
            return '';
12145
        }
12146
12147 14
        return $str;
12148
    }
12149
12150
    /**
12151
     * fix -> utf8-win1252 chars
12152
     *
12153
     * @param string $str <p>The input string.</p>
12154
     *
12155
     * @return string
12156
     *
12157
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
12158
     */
12159 2
    public static function utf8_fix_win1252_chars(string $str): string
12160
    {
12161 2
        return self::fix_simple_utf8($str);
12162
    }
12163
12164
    /**
12165
     * Returns an array with all utf8 whitespace characters.
12166
     *
12167
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12168
     *
12169
     * @return string[]
12170
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12171
     *                  as defined in above URL
12172
     */
12173 2
    public static function whitespace_table(): array
12174
    {
12175 2
        return self::$WHITESPACE_TABLE;
12176
    }
12177
12178
    /**
12179
     * Limit the number of words in a string.
12180
     *
12181
     * @param string $str        <p>The input string.</p>
12182
     * @param int    $limit      <p>The limit of words as integer.</p>
12183
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12184
     *
12185
     * @return string
12186
     */
12187 2
    public static function words_limit(
12188
        string $str,
12189
        int $limit = 100,
12190
        string $str_add_on = '…'
12191
    ): string {
12192 2
        if ($str === '' || $limit < 1) {
12193 2
            return '';
12194
        }
12195
12196 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12197
12198
        if (
12199 2
            !isset($matches[0])
12200
            ||
12201 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12202
        ) {
12203 2
            return $str;
12204
        }
12205
12206 2
        return \rtrim($matches[0]) . $str_add_on;
12207
    }
12208
12209
    /**
12210
     * Wraps a string to a given number of characters
12211
     *
12212
     * @see http://php.net/manual/en/function.wordwrap.php
12213
     *
12214
     * @param string $str   <p>The input string.</p>
12215
     * @param int    $width [optional] <p>The column width.</p>
12216
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12217
     * @param bool   $cut   [optional] <p>
12218
     *                      If the cut is set to true, the string is
12219
     *                      always wrapped at or before the specified width. So if you have
12220
     *                      a word that is larger than the given width, it is broken apart.
12221
     *                      </p>
12222
     *
12223
     * @return string
12224
     *                <p>The given string wrapped at the specified column.</p>
12225
     */
12226 12
    public static function wordwrap(
12227
        string $str,
12228
        int $width = 75,
12229
        string $break = "\n",
12230
        bool $cut = false
12231
    ): string {
12232 12
        if ($str === '' || $break === '') {
12233 4
            return '';
12234
        }
12235
12236 10
        $str_split = \explode($break, $str);
12237 10
        if ($str_split === false) {
12238
            return '';
12239
        }
12240
12241 10
        $chars = [];
12242 10
        $word_split = '';
12243 10
        foreach ($str_split as $i => $i_value) {
12244 10
            if ($i) {
12245 3
                $chars[] = $break;
12246 3
                $word_split .= '#';
12247
            }
12248
12249 10
            foreach (self::str_split($i_value) as $c) {
12250 10
                $chars[] = $c;
12251 10
                if ($c === ' ') {
12252 3
                    $word_split .= ' ';
12253
                } else {
12254 10
                    $word_split .= '?';
12255
                }
12256
            }
12257
        }
12258
12259 10
        $str_return = '';
12260 10
        $j = 0;
12261 10
        $b = -1;
12262 10
        $i = -1;
12263 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12264
12265 10
        $max = \mb_strlen($word_split);
12266 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12267 8
            for (++$i; $i < $b; ++$i) {
12268 8
                $str_return .= $chars[$j];
12269 8
                unset($chars[$j++]);
12270
12271
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12272 8
                if ($i > $max) {
12273
                    break 2;
12274
                }
12275
            }
12276
12277
            if (
12278 8
                $break === $chars[$j]
12279
                ||
12280 8
                $chars[$j] === ' '
12281
            ) {
12282 5
                unset($chars[$j++]);
12283
            }
12284
12285 8
            $str_return .= $break;
12286
12287
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12288 8
            if ($b > $max) {
12289
                break;
12290
            }
12291
        }
12292
12293 10
        return $str_return . \implode('', $chars);
12294
    }
12295
12296
    /**
12297
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12298
     *    ... so that we wrap the per line.
12299
     *
12300
     * @param string      $str             <p>The input string.</p>
12301
     * @param int         $width           [optional] <p>The column width.</p>
12302
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12303
     * @param bool        $cut             [optional] <p>
12304
     *                                     If the cut is set to true, the string is
12305
     *                                     always wrapped at or before the specified width. So if you have
12306
     *                                     a word that is larger than the given width, it is broken apart.
12307
     *                                     </p>
12308
     * @param bool        $add_final_break [optional] <p>
12309
     *                                     If this flag is true, then the method will add a $break at the end
12310
     *                                     of the result string.
12311
     *                                     </p>
12312
     * @param string|null $delimiter       [optional] <p>
12313
     *                                     You can change the default behavior, where we split the string by newline.
12314
     *                                     </p>
12315
     *
12316
     * @return string
12317
     */
12318 1
    public static function wordwrap_per_line(
12319
        string $str,
12320
        int $width = 75,
12321
        string $break = "\n",
12322
        bool $cut = false,
12323
        bool $add_final_break = true,
12324
        string $delimiter = null
12325
    ): string {
12326 1
        if ($delimiter === null) {
12327 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12328
        } else {
12329 1
            $strings = \explode($delimiter, $str);
12330
        }
12331
12332 1
        $string_helper_array = [];
12333 1
        if ($strings !== false) {
12334 1
            foreach ($strings as $value) {
12335 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12336
            }
12337
        }
12338
12339 1
        if ($add_final_break) {
12340 1
            $final_break = $break;
12341
        } else {
12342 1
            $final_break = '';
12343
        }
12344
12345 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12346
    }
12347
12348
    /**
12349
     * Returns an array of Unicode White Space characters.
12350
     *
12351
     * @return string[] an array with numeric code point as key and White Space Character as value
12352
     */
12353 2
    public static function ws(): array
12354
    {
12355 2
        return self::$WHITESPACE;
12356
    }
12357
12358
    /**
12359
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
12360
     *
12361
     * @see http://hsivonen.iki.fi/php-utf8/
12362
     *
12363
     * @param string $str    <p>The string to be checked.</p>
12364
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12365
     *
12366
     * @return bool
12367
     *
12368
     * @noinspection ReturnTypeCanBeDeclaredInspection
12369
     */
12370 108
    private static function is_utf8_string(string $str, bool $strict = false)
12371
    {
12372 108
        if ($str === '') {
12373 14
            return true;
12374
        }
12375
12376 102
        if ($strict === true) {
12377 2
            $is_binary = self::is_binary($str, true);
12378
12379 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12380 2
                return false;
12381
            }
12382
12383
            if ($is_binary && self::is_utf32($str, false) !== false) {
12384
                return false;
12385
            }
12386
        }
12387
12388 102
        if (self::pcre_utf8_support() !== true) {
12389
            // If even just the first character can be matched, when the /u
12390
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12391
            // invalid, nothing at all will match, even if the string contains
12392
            // some valid sequences
12393
            return \preg_match('/^./us', $str, $ar) === 1;
12394
        }
12395
12396 102
        $mState = 0; // cached expected number of octets after the current octet
12397
        // until the beginning of the next UTF8 character sequence
12398 102
        $mUcs4 = 0; // cached Unicode character
12399 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12400
12401 102
        if (self::$ORD === null) {
12402
            self::$ORD = self::getData('ord');
12403
        }
12404
12405 102
        $len = \strlen($str);
12406
        /** @noinspection ForeachInvariantsInspection */
12407 102
        for ($i = 0; $i < $len; ++$i) {
12408 102
            $in = self::$ORD[$str[$i]];
12409
12410 102
            if ($mState === 0) {
12411
                // When mState is zero we expect either a US-ASCII character or a
12412
                // multi-octet sequence.
12413 102
                if ((0x80 & $in) === 0) {
12414
                    // US-ASCII, pass straight through.
12415 97
                    $mBytes = 1;
12416 83
                } elseif ((0xE0 & $in) === 0xC0) {
12417
                    // First octet of 2 octet sequence.
12418 73
                    $mUcs4 = $in;
12419 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12420 73
                    $mState = 1;
12421 73
                    $mBytes = 2;
12422 58
                } elseif ((0xF0 & $in) === 0xE0) {
12423
                    // First octet of 3 octet sequence.
12424 42
                    $mUcs4 = $in;
12425 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12426 42
                    $mState = 2;
12427 42
                    $mBytes = 3;
12428 29
                } elseif ((0xF8 & $in) === 0xF0) {
12429
                    // First octet of 4 octet sequence.
12430 18
                    $mUcs4 = $in;
12431 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12432 18
                    $mState = 3;
12433 18
                    $mBytes = 4;
12434 13
                } elseif ((0xFC & $in) === 0xF8) {
12435
                    /* First octet of 5 octet sequence.
12436
                     *
12437
                     * This is illegal because the encoded codepoint must be either
12438
                     * (a) not the shortest form or
12439
                     * (b) outside the Unicode range of 0-0x10FFFF.
12440
                     * Rather than trying to resynchronize, we will carry on until the end
12441
                     * of the sequence and let the later error handling code catch it.
12442
                     */
12443 5
                    $mUcs4 = $in;
12444 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12445 5
                    $mState = 4;
12446 5
                    $mBytes = 5;
12447 10
                } elseif ((0xFE & $in) === 0xFC) {
12448
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12449 5
                    $mUcs4 = $in;
12450 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12451 5
                    $mState = 5;
12452 5
                    $mBytes = 6;
12453
                } else {
12454
                    // Current octet is neither in the US-ASCII range nor a legal first
12455
                    // octet of a multi-octet sequence.
12456 102
                    return false;
12457
                }
12458 83
            } elseif ((0xC0 & $in) === 0x80) {
12459
12460
                // When mState is non-zero, we expect a continuation of the multi-octet
12461
                // sequence
12462
12463
                // Legal continuation.
12464 75
                $shift = ($mState - 1) * 6;
12465 75
                $tmp = $in;
12466 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12467 75
                $mUcs4 |= $tmp;
12468
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12469
                // Unicode code point to be output.
12470 75
                if (--$mState === 0) {
12471
                    // Check for illegal sequences and code points.
12472
                    //
12473
                    // From Unicode 3.1, non-shortest form is illegal
12474
                    if (
12475 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12476
                        ||
12477 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12478
                        ||
12479 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12480
                        ||
12481 75
                        ($mBytes > 4)
12482
                        ||
12483
                        // From Unicode 3.2, surrogate characters are illegal.
12484 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12485
                        ||
12486
                        // Code points outside the Unicode range are illegal.
12487 75
                        ($mUcs4 > 0x10FFFF)
12488
                    ) {
12489 9
                        return false;
12490
                    }
12491
                    // initialize UTF8 cache
12492 75
                    $mState = 0;
12493 75
                    $mUcs4 = 0;
12494 75
                    $mBytes = 1;
12495
                }
12496
            } else {
12497
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12498
                // Incomplete multi-octet sequence.
12499 35
                return false;
12500
            }
12501
        }
12502
12503 67
        return true;
12504
    }
12505
12506
    /**
12507
     * @param string $str
12508
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12509
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12510
     *
12511
     * @return string
12512
     *
12513
     * @noinspection ReturnTypeCanBeDeclaredInspection
12514
     */
12515 33
    private static function fixStrCaseHelper(
12516
        string $str,
12517
        $use_lowercase = false,
12518
        $use_full_case_fold = false
12519
    ) {
12520 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12521 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12522
12523 33
        if ($use_lowercase === true) {
12524 2
            $str = \str_replace(
12525 2
                $upper,
12526 2
                $lower,
12527 2
                $str
12528
            );
12529
        } else {
12530 31
            $str = \str_replace(
12531 31
                $lower,
12532 31
                $upper,
12533 31
                $str
12534
            );
12535
        }
12536
12537 33
        if ($use_full_case_fold) {
12538 31
            static $FULL_CASE_FOLD = null;
12539 31
            if ($FULL_CASE_FOLD === null) {
12540 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12541
            }
12542
12543 31
            if ($use_lowercase === true) {
12544 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12545
            } else {
12546 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12547
            }
12548
        }
12549
12550 33
        return $str;
12551
    }
12552
12553
    /**
12554
     * get data from "/data/*.php"
12555
     *
12556
     * @param string $file
12557
     *
12558
     * @return array
12559
     *
12560
     * @noinspection ReturnTypeCanBeDeclaredInspection
12561
     */
12562 6
    private static function getData(string $file)
12563
    {
12564
        /** @noinspection PhpIncludeInspection */
12565
        /** @noinspection UsingInclusionReturnValueInspection */
12566
        /** @psalm-suppress UnresolvableInclude */
12567 6
        return include __DIR__ . '/data/' . $file . '.php';
12568
    }
12569
12570
    /**
12571
     * @return true|null
12572
     */
12573 12
    private static function initEmojiData()
12574
    {
12575 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12576 1
            if (self::$EMOJI === null) {
12577 1
                self::$EMOJI = self::getData('emoji');
12578
            }
12579
12580 1
            \uksort(
12581 1
                self::$EMOJI,
12582
                static function (string $a, string $b): int {
12583 1
                    return \strlen($b) <=> \strlen($a);
12584 1
                }
12585
            );
12586
12587 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12588 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12589
12590 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12591 1
                $tmp_key = \crc32($key);
12592 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12593
            }
12594
12595 1
            return true;
12596
        }
12597
12598 12
        return null;
12599
    }
12600
12601
    /**
12602
     * Checks whether mbstring "overloaded" is active on the server.
12603
     *
12604
     * @return bool
12605
     *
12606
     * @noinspection ReturnTypeCanBeDeclaredInspection
12607
     */
12608
    private static function mbstring_overloaded()
12609
    {
12610
        /**
12611
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12612
         */
12613
12614
        /** @noinspection PhpComposerExtensionStubsInspection */
12615
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12616
        return \defined('MB_OVERLOAD_STRING')
12617
               &&
12618
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12619
    }
12620
12621
    /**
12622
     * @param array    $strings
12623
     * @param bool     $remove_empty_values
12624
     * @param int|null $remove_short_values
12625
     *
12626
     * @return array
12627
     *
12628
     * @noinspection ReturnTypeCanBeDeclaredInspection
12629
     */
12630 2
    private static function reduce_string_array(
12631
        array $strings,
12632
        bool $remove_empty_values,
12633
        int $remove_short_values = null
12634
    ) {
12635
        // init
12636 2
        $return = [];
12637
12638 2
        foreach ($strings as &$str) {
12639
            if (
12640 2
                $remove_short_values !== null
12641
                &&
12642 2
                \mb_strlen($str) <= $remove_short_values
12643
            ) {
12644 2
                continue;
12645
            }
12646
12647
            if (
12648 2
                $remove_empty_values === true
12649
                &&
12650 2
                \trim($str) === ''
12651
            ) {
12652 2
                continue;
12653
            }
12654
12655 2
            $return[] = $str;
12656
        }
12657
12658 2
        return $return;
12659
    }
12660
12661
    /**
12662
     * rxClass
12663
     *
12664
     * @param string $s
12665
     * @param string $class
12666
     *
12667
     * @return string
12668
     *
12669
     * @noinspection ReturnTypeCanBeDeclaredInspection
12670
     */
12671 33
    private static function rxClass(string $s, string $class = '')
12672
    {
12673 33
        static $RX_CLASS_CACHE = [];
12674
12675 33
        $cache_key = $s . $class;
12676
12677 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12678 21
            return $RX_CLASS_CACHE[$cache_key];
12679
        }
12680
12681 16
        $class_array = [$class];
12682
12683
        /** @noinspection SuspiciousLoopInspection */
12684
        /** @noinspection AlterInForeachInspection */
12685 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12686 15
            if ($s === '-') {
12687
                $class_array[0] = '-' . $class_array[0];
12688 15
            } elseif (!isset($s[2])) {
12689 15
                $class_array[0] .= \preg_quote($s, '/');
12690 1
            } elseif (self::strlen($s) === 1) {
12691 1
                $class_array[0] .= $s;
12692
            } else {
12693 15
                $class_array[] = $s;
12694
            }
12695
        }
12696
12697 16
        if ($class_array[0]) {
12698 16
            $class_array[0] = '[' . $class_array[0] . ']';
12699
        }
12700
12701 16
        if (\count($class_array) === 1) {
12702 16
            $return = $class_array[0];
12703
        } else {
12704
            $return = '(?:' . \implode('|', $class_array) . ')';
12705
        }
12706
12707 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12708
12709 16
        return $return;
12710
    }
12711
12712
    /**
12713
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12714
     *
12715
     * @param string $names
12716
     * @param string $delimiter
12717
     * @param string $encoding
12718
     *
12719
     * @return string
12720
     *
12721
     * @noinspection ReturnTypeCanBeDeclaredInspection
12722
     */
12723 1
    private static function str_capitalize_name_helper(
12724
        string $names,
12725
        string $delimiter,
12726
        string $encoding = 'UTF-8'
12727
    ) {
12728
        // init
12729 1
        $name_helper_array = \explode($delimiter, $names);
12730 1
        if ($name_helper_array === false) {
12731
            return '';
12732
        }
12733
12734
        $special_cases = [
12735 1
            'names' => [
12736
                'ab',
12737
                'af',
12738
                'al',
12739
                'and',
12740
                'ap',
12741
                'bint',
12742
                'binte',
12743
                'da',
12744
                'de',
12745
                'del',
12746
                'den',
12747
                'der',
12748
                'di',
12749
                'dit',
12750
                'ibn',
12751
                'la',
12752
                'mac',
12753
                'nic',
12754
                'of',
12755
                'ter',
12756
                'the',
12757
                'und',
12758
                'van',
12759
                'von',
12760
                'y',
12761
                'zu',
12762
            ],
12763
            'prefixes' => [
12764
                'al-',
12765
                "d'",
12766
                'ff',
12767
                "l'",
12768
                'mac',
12769
                'mc',
12770
                'nic',
12771
            ],
12772
        ];
12773
12774 1
        foreach ($name_helper_array as &$name) {
12775 1
            if (\in_array($name, $special_cases['names'], true)) {
12776 1
                continue;
12777
            }
12778
12779 1
            $continue = false;
12780
12781 1
            if ($delimiter === '-') {
12782
                /** @noinspection AlterInForeachInspection */
12783 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12784 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12785 1
                        $continue = true;
12786
                    }
12787
                }
12788
            }
12789
12790
            /** @noinspection AlterInForeachInspection */
12791 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12792 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12793 1
                    $continue = true;
12794
                }
12795
            }
12796
12797 1
            if ($continue === true) {
12798 1
                continue;
12799
            }
12800
12801 1
            $name = self::ucfirst($name);
12802
        }
12803
12804 1
        return \implode($delimiter, $name_helper_array);
12805
    }
12806
12807
    /**
12808
     * Generic case-sensitive transformation for collation matching.
12809
     *
12810
     * @param string $str <p>The input string</p>
12811
     *
12812
     * @return string|null
12813
     */
12814 6
    private static function strtonatfold(string $str)
12815
    {
12816
        /** @noinspection PhpUndefinedClassInspection */
12817 6
        return \preg_replace(
12818 6
            '/\p{Mn}+/u',
12819 6
            '',
12820 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12821
        );
12822
    }
12823
12824
    /**
12825
     * @param int|string $input
12826
     *
12827
     * @return string
12828
     *
12829
     * @noinspection ReturnTypeCanBeDeclaredInspection
12830
     */
12831 31
    private static function to_utf8_convert_helper($input)
12832
    {
12833
        // init
12834 31
        $buf = '';
12835
12836 31
        if (self::$ORD === null) {
12837 1
            self::$ORD = self::getData('ord');
12838
        }
12839
12840 31
        if (self::$CHR === null) {
12841 1
            self::$CHR = self::getData('chr');
12842
        }
12843
12844 31
        if (self::$WIN1252_TO_UTF8 === null) {
12845 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12846
        }
12847
12848 31
        $ordC1 = self::$ORD[$input];
12849 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12850 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12851
        } else {
12852
            /** @noinspection OffsetOperationsInspection */
12853 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12854 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12855 1
            $buf .= $cc1 . $cc2;
12856
        }
12857
12858 31
        return $buf;
12859
    }
12860
12861
    /**
12862
     * @param string $str
12863
     *
12864
     * @return string
12865
     *
12866
     * @noinspection ReturnTypeCanBeDeclaredInspection
12867
     */
12868 9
    private static function urldecode_unicode_helper(string $str)
12869
    {
12870 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12871 9
        if (\preg_match($pattern, $str)) {
12872 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12873
        }
12874
12875 9
        return $str;
12876
    }
12877
}
12878