Passed
Push — master ( c5aae7...acc240 )
by Lars
09:56 queued 03:00
created

UTF8   F

Complexity

Total Complexity 1699

Size/Duplication

Total Lines 12891
Duplicated Lines 0 %

Test Coverage

Coverage 79%

Importance

Changes 98
Bugs 52 Features 6
Metric Value
eloc 4359
c 98
b 52
f 6
dl 0
loc 12891
ccs 2881
cts 3647
cp 0.79
rs 0.8
wmc 1699

299 Methods

Rating   Name   Duplication   Size   Complexity  
A max() 0 14 3
A ctype_loaded() 0 3 1
A decimal_to_chr() 0 3 1
A hasBom() 0 3 1
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A chr_to_int() 0 3 1
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 3 1
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
B chr_to_decimal() 0 38 8
D chr() 0 101 18
A chunk_split() 0 3 1
A chr_map() 0 5 1
A checkForSupport() 0 47 4
A chr_to_hex() 0 11 3
A file_has_bom() 0 8 2
A filter_input() 0 13 3
A get_unique_string() 0 15 2
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
A emoji_decode() 0 18 2
B get_file_type() 0 61 7
C filter() 0 59 13
A decode_mimeheader() 0 15 5
A emoji_encode() 0 18 2
B get_random_string() 0 56 10
A fix_utf8() 0 30 4
A first_char() 0 14 4
A css_stripe_media_queries() 0 6 1
A clean() 0 48 6
D getCharDirection() 0 105 118
A filter_var_array() 0 12 2
A codepoints() 0 36 5
A cleanup() 0 25 2
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
F extract_text() 0 175 34
A filter_var() 0 12 2
F encode() 0 140 37
A fix_simple_utf8() 0 19 4
A has_lowercase() 0 8 2
A filter_input_array() 0 12 3
A getSupportInfo() 0 13 3
A collapse_whitespace() 0 8 2
B file_get_contents() 0 56 11
A chr_size_list() 0 17 3
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A has_uppercase() 0 8 2
A isBinary() 0 3 1
A is_utf8() 0 13 4
A lcword() 0 13 1
A html_escape() 0 6 1
C is_utf16() 0 68 16
A isHtml() 0 3 1
A isBase64() 0 3 1
A is_html() 0 14 2
A html_decode() 0 6 1
A isUtf32() 0 3 1
A is_alpha() 0 8 2
A isUtf8() 0 3 1
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 3 1
A is_blank() 0 8 2
A htmlspecialchars() 0 15 3
A has_whitespace() 0 8 2
A lowerCaseFirst() 0 13 1
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A mbstring_overloaded() 0 11 2
A html_stripe_empty_tags() 0 6 1
A json_loaded() 0 3 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A iconv_loaded() 0 3 1
A lcwords() 0 34 6
A isAscii() 0 3 1
A is_empty() 0 3 1
A isUtf16() 0 3 1
C is_utf32() 0 68 16
A is_alphanumeric() 0 8 2
A json_decode() 0 14 2
B is_json() 0 29 8
A int_to_hex() 0 7 2
A json_encode() 0 10 2
A is_base64() 0 20 5
A hex_to_int() 0 14 3
A htmlentities() 0 28 3
A hex_to_chr() 0 3 1
A isJson() 0 3 1
C html_entity_decode() 0 55 13
A str_substr_after_first_separator() 0 28 6
A str_begins() 0 3 1
B str_camelize() 0 70 10
A parse_str() 0 16 4
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 99 19
A str_isubstr_last() 0 25 4
A str_replace_beginning() 0 24 6
A remove_left() 0 24 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 61 13
A ltrim() 0 27 5
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A toUTF8() 0 3 1
A string() 0 12 3
D normalize_encoding() 0 142 16
B rxClass() 0 39 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 160 7
A normalize_whitespace() 0 9 1
A str_starts_with() 0 11 3
A str_humanize() 0 15 1
C substr_count_in_byte() 0 55 15
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 27 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 35 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 51 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A ws() 0 3 1
A str_replace_first() 0 20 2
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A trim() 0 27 5
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 34 8
A str_contains_all() 0 23 6
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B rawurldecode() 0 51 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 82 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A str_replace() 0 14 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
D str_split() 0 132 30
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A remove_right() 0 25 4
F strrpos() 0 119 25
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
A to_ascii() 0 6 1
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 26 5
B str_snakeize() 0 55 6
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 55 11
A str_upper_first() 0 13 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
B html_encode() 0 53 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C ord() 0 72 16
A strtonatfold() 0 7 1
C strcspn() 0 52 12
A fixStrCaseHelper() 0 36 5
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 38 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A min() 0 14 3
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 26 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // HALFWIDTH HANGUL FILLER
92
        65440 => "\xef\xbe\xa0",
93
        // IDEOGRAPHIC SPACE
94
        12288 => "\xe3\x80\x80",
95
    ];
96
97
    /**
98
     * @var array
99
     */
100
    private static $WHITESPACE_TABLE = [
101
        'SPACE'                     => "\x20",
102
        'NO-BREAK SPACE'            => "\xc2\xa0",
103
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
104
        'EN QUAD'                   => "\xe2\x80\x80",
105
        'EM QUAD'                   => "\xe2\x80\x81",
106
        'EN SPACE'                  => "\xe2\x80\x82",
107
        'EM SPACE'                  => "\xe2\x80\x83",
108
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
109
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
110
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
111
        'FIGURE SPACE'              => "\xe2\x80\x87",
112
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
113
        'THIN SPACE'                => "\xe2\x80\x89",
114
        'HAIR SPACE'                => "\xe2\x80\x8a",
115
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
116
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
117
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
118
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
119
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
120
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
121
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
122
    ];
123
124
    /**
125
     * @var array{upper: string[], lower: string[]}
126
     */
127
    private static $COMMON_CASE_FOLD = [
128
        'upper' => [
129
            'µ',
130
            'ſ',
131
            "\xCD\x85",
132
            'ς',
133
            'ẞ',
134
            "\xCF\x90",
135
            "\xCF\x91",
136
            "\xCF\x95",
137
            "\xCF\x96",
138
            "\xCF\xB0",
139
            "\xCF\xB1",
140
            "\xCF\xB5",
141
            "\xE1\xBA\x9B",
142
            "\xE1\xBE\xBE",
143
        ],
144
        'lower' => [
145
            'μ',
146
            's',
147
            'ι',
148
            'σ',
149
            'ß',
150
            'β',
151
            'θ',
152
            'φ',
153
            'π',
154
            'κ',
155
            'ρ',
156
            'ε',
157
            "\xE1\xB9\xA1",
158
            'ι',
159
        ],
160
    ];
161
162
    /**
163
     * @var array
164
     */
165
    private static $SUPPORT = [];
166
167
    /**
168
     * @var array|null
169
     */
170
    private static $BROKEN_UTF8_FIX;
171
172
    /**
173
     * @var array|null
174
     */
175
    private static $WIN1252_TO_UTF8;
176
177
    /**
178
     * @var array|null
179
     */
180
    private static $INTL_TRANSLITERATOR_LIST;
181
182
    /**
183
     * @var array|null
184
     */
185
    private static $ENCODINGS;
186
187
    /**
188
     * @var array|null
189
     */
190
    private static $ORD;
191
192
    /**
193
     * @var array|null
194
     */
195
    private static $EMOJI;
196
197
    /**
198
     * @var array|null
199
     */
200
    private static $EMOJI_VALUES_CACHE;
201
202
    /**
203
     * @var array|null
204
     */
205
    private static $EMOJI_KEYS_CACHE;
206
207
    /**
208
     * @var array|null
209
     */
210
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
211
212
    /**
213
     * @var array|null
214
     */
215
    private static $CHR;
216
217
    /**
218
     * __construct()
219
     */
220 33
    public function __construct()
221
    {
222 33
    }
223
224
    /**
225
     * Return the character at the specified position: $str[1] like functionality.
226
     *
227
     * @param string $str      <p>A UTF-8 string.</p>
228
     * @param int    $pos      <p>The position of character to return.</p>
229
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
230
     *
231
     * @return string single multi-byte character
232
     */
233 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
234
    {
235 3
        if ($str === '' || $pos < 0) {
236 2
            return '';
237
        }
238
239 3
        if ($encoding === 'UTF-8') {
240 3
            return (string) \mb_substr($str, $pos, 1);
241
        }
242
243
        return (string) self::substr($str, $pos, 1, $encoding);
244
    }
245
246
    /**
247
     * Prepends UTF-8 BOM character to the string and returns the whole string.
248
     *
249
     * INFO: If BOM already existed there, the Input string is returned.
250
     *
251
     * @param string $str <p>The input string.</p>
252
     *
253
     * @return string the output string that contains BOM
254
     */
255 2
    public static function add_bom_to_string(string $str): string
256
    {
257 2
        if (self::string_has_bom($str) === false) {
258 2
            $str = self::bom() . $str;
259
        }
260
261 2
        return $str;
262
    }
263
264
    /**
265
     * Changes all keys in an array.
266
     *
267
     * @param array  $array    <p>The array to work on</p>
268
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
269
     *                         or <strong>CASE_LOWER</strong> (default)</p>
270
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
271
     *
272
     * @return string[]
273
     *                  <p>An array with its keys lower- or uppercased.</p>
274
     */
275 2
    public static function array_change_key_case(
276
        array $array,
277
        int $case = \CASE_LOWER,
278
        string $encoding = 'UTF-8'
279
    ): array {
280
        if (
281 2
            $case !== \CASE_LOWER
282
            &&
283 2
            $case !== \CASE_UPPER
284
        ) {
285
            $case = \CASE_LOWER;
286
        }
287
288 2
        $return = [];
289 2
        foreach ($array as $key => &$value) {
290 2
            $key = $case === \CASE_LOWER
291 2
                ? self::strtolower((string) $key, $encoding)
292 2
                : self::strtoupper((string) $key, $encoding);
293
294 2
            $return[$key] = $value;
295
        }
296
297 2
        return $return;
298
    }
299
300
    /**
301
     * Returns the substring between $start and $end, if found, or an empty
302
     * string. An optional offset may be supplied from which to begin the
303
     * search for the start string.
304
     *
305
     * @param string $str
306
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
307
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
308
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @return string
312
     */
313 16
    public static function between(
314
        string $str,
315
        string $start,
316
        string $end,
317
        int $offset = 0,
318
        string $encoding = 'UTF-8'
319
    ): string {
320 16
        if ($encoding === 'UTF-8') {
321 8
            $start_position = \mb_strpos($str, $start, $offset);
322 8
            if ($start_position === false) {
323 1
                return '';
324
            }
325
326 7
            $substr_index = $start_position + (int) \mb_strlen($start);
327 7
            $end_position = \mb_strpos($str, $end, $substr_index);
328
            if (
329 7
                $end_position === false
330
                ||
331 7
                $end_position === $substr_index
332
            ) {
333 2
                return '';
334
            }
335
336 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
337
        }
338
339 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
340
341 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
342 8
        if ($start_position === false) {
343 1
            return '';
344
        }
345
346 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
347 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
348
        if (
349 7
            $end_position === false
350
            ||
351 7
            $end_position === $substr_index
352
        ) {
353 2
            return '';
354
        }
355
356 5
        return (string) self::substr(
357 5
            $str,
358 5
            $substr_index,
359 5
            $end_position - $substr_index,
360 5
            $encoding
361
        );
362
    }
363
364
    /**
365
     * Convert binary into a string.
366
     *
367
     * @param mixed $bin 1|0
368
     *
369
     * @return string
370
     */
371 2
    public static function binary_to_str($bin): string
372
    {
373 2
        if (!isset($bin[0])) {
374
            return '';
375
        }
376
377 2
        $convert = \base_convert($bin, 2, 16);
378 2
        if ($convert === '0') {
379 1
            return '';
380
        }
381
382 2
        return \pack('H*', $convert);
383
    }
384
385
    /**
386
     * Returns the UTF-8 Byte Order Mark Character.
387
     *
388
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
389
     *
390
     * @return string UTF-8 Byte Order Mark
391
     */
392 4
    public static function bom(): string
393
    {
394 4
        return "\xef\xbb\xbf";
395
    }
396
397
    /**
398
     * @alias of UTF8::chr_map()
399
     *
400
     * @param callable $callback
401
     * @param string   $str
402
     *
403
     * @return string[]
404
     *
405
     * @see UTF8::chr_map()
406
     */
407 2
    public static function callback($callback, string $str): array
408
    {
409 2
        return self::chr_map($callback, $str);
410
    }
411
412
    /**
413
     * Returns the character at $index, with indexes starting at 0.
414
     *
415
     * @param string $str      <p>The input string.</p>
416
     * @param int    $index    <p>Position of the character.</p>
417
     * @param string $encoding [optional] <p>Default is UTF-8</p>
418
     *
419
     * @return string the character at $index
420
     */
421 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
422
    {
423 9
        if ($encoding === 'UTF-8') {
424 5
            return (string) \mb_substr($str, $index, 1);
425
        }
426
427 4
        return (string) self::substr($str, $index, 1, $encoding);
428
    }
429
430
    /**
431
     * Returns an array consisting of the characters in the string.
432
     *
433
     * @param string $str <p>The input string.</p>
434
     *
435
     * @return string[] an array of chars
436
     */
437 3
    public static function chars(string $str): array
438
    {
439 3
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
440
    }
441
442
    /**
443
     * This method will auto-detect your server environment for UTF-8 support.
444
     *
445
     * @return true|null
446
     *
447
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
448
     */
449 5
    public static function checkForSupport()
450
    {
451 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
452
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
453
454
            // http://php.net/manual/en/book.mbstring.php
455
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
456
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
457
            if (self::$SUPPORT['mbstring'] === true) {
458
                \mb_internal_encoding('UTF-8');
459
                /** @noinspection UnusedFunctionResultInspection */
460
                /** @noinspection PhpComposerExtensionStubsInspection */
461
                \mb_regex_encoding('UTF-8');
462
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
463
            }
464
465
            // http://php.net/manual/en/book.iconv.php
466
            self::$SUPPORT['iconv'] = self::iconv_loaded();
467
468
            // http://php.net/manual/en/book.intl.php
469
            self::$SUPPORT['intl'] = self::intl_loaded();
470
471
            // http://php.net/manual/en/class.intlchar.php
472
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
473
474
            // http://php.net/manual/en/book.ctype.php
475
            self::$SUPPORT['ctype'] = self::ctype_loaded();
476
477
            // http://php.net/manual/en/class.finfo.php
478
            self::$SUPPORT['finfo'] = self::finfo_loaded();
479
480
            // http://php.net/manual/en/book.json.php
481
            self::$SUPPORT['json'] = self::json_loaded();
482
483
            // http://php.net/manual/en/book.pcre.php
484
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
485
486
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
487
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
488
                \mb_internal_encoding('UTF-8');
489
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
490
            }
491
492
            return true;
493
        }
494
495 5
        return null;
496
    }
497
498
    /**
499
     * Generates a UTF-8 encoded character from the given code point.
500
     *
501
     * INFO: opposite to UTF8::ord()
502
     *
503
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
504
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
505
     *
506
     * @return string|null multi-byte character, returns null on failure or empty input
507
     */
508 21
    public static function chr($code_point, string $encoding = 'UTF-8')
509
    {
510
        // init
511 21
        static $CHAR_CACHE = [];
512
513 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
514 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
515
        }
516
517
        if (
518 21
            $encoding !== 'UTF-8'
519
            &&
520 21
            $encoding !== 'ISO-8859-1'
521
            &&
522 21
            $encoding !== 'WINDOWS-1252'
523
            &&
524 21
            self::$SUPPORT['mbstring'] === false
525
        ) {
526
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
527
        }
528
529 21
        $cache_key = $code_point . $encoding;
530 21
        if (isset($CHAR_CACHE[$cache_key]) === true) {
531 19
            return $CHAR_CACHE[$cache_key];
532
        }
533
534 12
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
535
536 12
            if (self::$CHR === null) {
537
                self::$CHR = self::getData('chr');
538
            }
539
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543 12
            $chr = self::$CHR[$code_point];
544
545 12
            if ($encoding !== 'UTF-8') {
546 1
                $chr = self::encode($encoding, $chr);
547
            }
548
549 12
            return $CHAR_CACHE[$cache_key] = $chr;
550
        }
551
552
        //
553
        // fallback via "IntlChar"
554
        //
555
556 5
        if (self::$SUPPORT['intlChar'] === true) {
557
            /** @noinspection PhpComposerExtensionStubsInspection */
558 5
            $chr = \IntlChar::chr($code_point);
559
560 5
            if ($encoding !== 'UTF-8') {
561
                $chr = self::encode($encoding, $chr);
562
            }
563
564 5
            return $CHAR_CACHE[$cache_key] = $chr;
565
        }
566
567
        //
568
        // fallback via vanilla php
569
        //
570
571
        if (self::$CHR === null) {
572
            self::$CHR = self::getData('chr');
573
        }
574
575
        $code_point = (int) $code_point;
576
        if ($code_point <= 0x7F) {
577
            /**
578
             * @psalm-suppress PossiblyNullArrayAccess
579
             */
580
            $chr = self::$CHR[$code_point];
581
        } elseif ($code_point <= 0x7FF) {
582
            /**
583
             * @psalm-suppress PossiblyNullArrayAccess
584
             */
585
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
586
                   self::$CHR[($code_point & 0x3F) + 0x80];
587
        } elseif ($code_point <= 0xFFFF) {
588
            /**
589
             * @psalm-suppress PossiblyNullArrayAccess
590
             */
591
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
592
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
593
                   self::$CHR[($code_point & 0x3F) + 0x80];
594
        } else {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
599
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
600
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
601
                   self::$CHR[($code_point & 0x3F) + 0x80];
602
        }
603
604
        if ($encoding !== 'UTF-8') {
605
            $chr = self::encode($encoding, $chr);
606
        }
607
608
        return $CHAR_CACHE[$cache_key] = $chr;
609
    }
610
611
    /**
612
     * Applies callback to all characters of a string.
613
     *
614
     * @param callable $callback <p>The callback function.</p>
615
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
616
     *
617
     * @return string[]
618
     *                  <p>The outcome of the callback, as array.</p>
619
     */
620 2
    public static function chr_map($callback, string $str): array
621
    {
622 2
        return \array_map(
623 2
            $callback,
624 2
            self::str_split($str)
625
        );
626
    }
627
628
    /**
629
     * Generates an array of byte length of each character of a Unicode string.
630
     *
631
     * 1 byte => U+0000  - U+007F
632
     * 2 byte => U+0080  - U+07FF
633
     * 3 byte => U+0800  - U+FFFF
634
     * 4 byte => U+10000 - U+10FFFF
635
     *
636
     * @param string $str <p>The original unicode string.</p>
637
     *
638
     * @return int[] an array of byte lengths of each character
639
     */
640 4
    public static function chr_size_list(string $str): array
641
    {
642 4
        if ($str === '') {
643 4
            return [];
644
        }
645
646 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
647
            return \array_map(
648
                static function (string $data): int {
649
                    // "mb_" is available if overload is used, so use it ...
650
                    return \mb_strlen($data, 'CP850'); // 8-BIT
651
                },
652
                self::str_split($str)
653
            );
654
        }
655
656 4
        return \array_map('\strlen', self::str_split($str));
657
    }
658
659
    /**
660
     * Get a decimal code representation of a specific character.
661
     *
662
     * @param string $char <p>The input character.</p>
663
     *
664
     * @return int
665
     */
666 4
    public static function chr_to_decimal(string $char): int
667
    {
668 4
        if (self::$SUPPORT['iconv'] === true) {
669 4
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
670 4
            if ($chr_tmp !== false) {
671
                /** @noinspection OffsetOperationsInspection */
672 4
                return \unpack('V', $chr_tmp)[1];
673
            }
674
        }
675
676
        $code = self::ord($char[0]);
677
        $bytes = 1;
678
679
        if (!($code & 0x80)) {
680
            // 0xxxxxxx
681
            return $code;
682
        }
683
684
        if (($code & 0xe0) === 0xc0) {
685
            // 110xxxxx
686
            $bytes = 2;
687
            $code &= ~0xc0;
688
        } elseif (($code & 0xf0) === 0xe0) {
689
            // 1110xxxx
690
            $bytes = 3;
691
            $code &= ~0xe0;
692
        } elseif (($code & 0xf8) === 0xf0) {
693
            // 11110xxx
694
            $bytes = 4;
695
            $code &= ~0xf0;
696
        }
697
698
        for ($i = 2; $i <= $bytes; ++$i) {
699
            // 10xxxxxx
700
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
701
        }
702
703
        return $code;
704
    }
705
706
    /**
707
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
708
     *
709
     * @param int|string $char   <p>The input character</p>
710
     * @param string     $prefix [optional]
711
     *
712
     * @return string The code point encoded as U+xxxx
713
     */
714 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
715
    {
716 2
        if ($char === '') {
717 2
            return '';
718
        }
719
720 2
        if ($char === '&#0;') {
721 2
            $char = '';
722
        }
723
724 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
725
    }
726
727
    /**
728
     * alias for "UTF8::chr_to_decimal()"
729
     *
730
     * @param string $chr
731
     *
732
     * @return int
733
     *
734
     * @see UTF8::chr_to_decimal()
735
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
736
     */
737 2
    public static function chr_to_int(string $chr): int
738
    {
739 2
        return self::chr_to_decimal($chr);
740
    }
741
742
    /**
743
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
744
     *
745
     * @param string $body         <p>The original string to be split.</p>
746
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
747
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
748
     *
749
     * @return string the chunked string
750
     */
751 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
752
    {
753 4
        return \implode($end, self::str_split($body, $chunk_length));
754
    }
755
756
    /**
757
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
758
     *
759
     * @param string $str                                     <p>The string to be sanitized.</p>
760
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
761
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
762
     *                                                        whitespace.</p>
763
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS Word chars
764
     *                                                        e.g.: "…"
765
     *                                                        => "..."</p>
766
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces, in
767
     *                                                        combination with
768
     *                                                        $normalize_whitespace</p>
769
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond question
770
     *                                                        mark e.g.: "�"</p>
771
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove invisible
772
     *                                                        characters e.g.: "\0"</p>
773
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove invisible
774
     *                                                        url encoded characters e.g.: "%0B"<br>
775
     *                                                        WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
776
     *                                                        </p>
777
     *
778
     * @return string clean UTF-8 encoded string
779
     */
780 87
    public static function clean(
781
        string $str,
782
        bool $remove_bom = false,
783
        bool $normalize_whitespace = false,
784
        bool $normalize_msword = false,
785
        bool $keep_non_breaking_space = false,
786
        bool $replace_diamond_question_mark = false,
787
        bool $remove_invisible_characters = true,
788
        bool $remove_invisible_characters_url_encoded = false
789
    ): string {
790
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
791
        // caused connection reset problem on larger strings
792
793 87
        $regex = '/
794
          (
795
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
796
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
797
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
798
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
799
            ){1,100}                      # ...one or more times
800
          )
801
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
802
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
803
        /x';
804
        /** @noinspection NotOptimalRegularExpressionsInspection */
805 87
        $str = (string) \preg_replace($regex, '$1', $str);
806
807 87
        if ($replace_diamond_question_mark === true) {
808 33
            $str = self::replace_diamond_question_mark($str, '');
809
        }
810
811 87
        if ($remove_invisible_characters === true) {
812 87
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
813
        }
814
815 87
        if ($normalize_whitespace === true) {
816 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
817
        }
818
819 87
        if ($normalize_msword === true) {
820 4
            $str = self::normalize_msword($str);
821
        }
822
823 87
        if ($remove_bom === true) {
824 37
            $str = self::remove_bom($str);
825
        }
826
827 87
        return $str;
828
    }
829
830
    /**
831
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
832
     *
833
     * @param string $str <p>The input string.</p>
834
     *
835
     * @return string
836
     */
837 33
    public static function cleanup($str): string
838
    {
839
        // init
840 33
        $str = (string) $str;
841
842 33
        if ($str === '') {
843 5
            return '';
844
        }
845
846
        // fixed ISO <-> UTF-8 Errors
847 33
        $str = self::fix_simple_utf8($str);
848
849
        // remove all none UTF-8 symbols
850
        // && remove diamond question mark (�)
851
        // && remove remove invisible characters (e.g. "\0")
852
        // && remove BOM
853
        // && normalize whitespace chars (but keep non-breaking-spaces)
854 33
        return self::clean(
855 33
            $str,
856 33
            true,
857 33
            true,
858 33
            false,
859 33
            true,
860 33
            true,
861 33
            true
862
        );
863
    }
864
865
    /**
866
     * Accepts a string or a array of strings and returns an array of Unicode code points.
867
     *
868
     * INFO: opposite to UTF8::string()
869
     *
870
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
871
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
872
     *                                 default, code points will be returned as integers.</p>
873
     *
874
     * @return array<int|string>
875
     *                           The array of code points:<br>
876
     *                           array<int> for $u_style === false<br>
877
     *                           array<string> for $u_style === true<br>
878
     */
879 12
    public static function codepoints($arg, bool $u_style = false): array
880
    {
881 12
        if (\is_string($arg) === true) {
882 12
            $arg = self::str_split($arg);
883
        }
884
885
        /**
886
         * @psalm-suppress DocblockTypeContradiction
887
         */
888 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
889 4
            return [];
890
        }
891
892 12
        if ($arg === []) {
893 7
            return [];
894
        }
895
896 11
        $arg = \array_map(
897
            [
898 11
                self::class,
899
                'ord',
900
            ],
901 11
            $arg
902
        );
903
904 11
        if ($u_style === true) {
905 2
            $arg = \array_map(
906
                [
907 2
                    self::class,
908
                    'int_to_hex',
909
                ],
910 2
                $arg
911
            );
912
        }
913
914 11
        return $arg;
915
    }
916
917
    /**
918
     * Trims the string and replaces consecutive whitespace characters with a
919
     * single space. This includes tabs and newline characters, as well as
920
     * multibyte whitespace such as the thin space and ideographic space.
921
     *
922
     * @param string $str <p>The input string.</p>
923
     *
924
     * @return string string with a trimmed $str and condensed whitespace
925
     */
926 13
    public static function collapse_whitespace(string $str): string
927
    {
928 13
        if (self::$SUPPORT['mbstring'] === true) {
929
            /** @noinspection PhpComposerExtensionStubsInspection */
930 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
931
        }
932
933
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
934
    }
935
936
    /**
937
     * Returns count of characters used in a string.
938
     *
939
     * @param string $str                     <p>The input string.</p>
940
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
941
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
942
     *
943
     * @return int[] an associative array of Character as keys and
944
     *               their count as values
945
     */
946 19
    public static function count_chars(
947
        string $str,
948
        bool $clean_utf8 = false,
949
        bool $try_to_use_mb_functions = true
950
    ): array {
951 19
        return \array_count_values(
952 19
            self::str_split(
953 19
                $str,
954 19
                1,
955 19
                $clean_utf8,
956 19
                $try_to_use_mb_functions
957
            )
958
        );
959
    }
960
961
    /**
962
     * Remove css media-queries.
963
     *
964
     * @param string $str
965
     *
966
     * @return string
967
     */
968 1
    public static function css_stripe_media_queries(string $str): string
969
    {
970 1
        return (string) \preg_replace(
971 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
972 1
            '',
973 1
            $str
974
        );
975
    }
976
977
    /**
978
     * Checks whether ctype is available on the server.
979
     *
980
     * @return bool
981
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
982
     */
983
    public static function ctype_loaded(): bool
984
    {
985
        return \extension_loaded('ctype');
986
    }
987
988
    /**
989
     * Converts an int value into a UTF-8 character.
990
     *
991
     * @param mixed $int
992
     *
993
     * @return string
994
     */
995 20
    public static function decimal_to_chr($int): string
996
    {
997 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
998
    }
999
1000
    /**
1001
     * Decodes a MIME header field
1002
     *
1003
     * @param string $str
1004
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1005
     *
1006
     * @return false|string
1007
     *                      A decoded MIME field on success,
1008
     *                      or false if an error occurs during the decoding
1009
     */
1010
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1011
    {
1012
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1013
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1014
        }
1015
1016
        if (self::$SUPPORT['iconv'] === true) {
1017
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1018
        }
1019
1020
        if ($encoding !== 'UTF-8') {
1021
            $str = self::encode($encoding, $str);
1022
        }
1023
1024
        return \mb_decode_mimeheader($str);
1025
    }
1026
1027
    /**
1028
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1029
     *
1030
     * @param string $str                            <p>The input string.</p>
1031
     * @param bool   $use_reversible_string_mappings [optional] <p>
1032
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1033
     *                                               between "emoji_encode" and "emoji_decode".</p>
1034
     *
1035
     * @return string
1036
     */
1037 9
    public static function emoji_decode(
1038
        string $str,
1039
        bool $use_reversible_string_mappings = false
1040
    ): string {
1041 9
        self::initEmojiData();
1042
1043 9
        if ($use_reversible_string_mappings === true) {
1044 9
            return (string) \str_replace(
1045 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1046 9
                (array) self::$EMOJI_VALUES_CACHE,
1047 9
                $str
1048
            );
1049
        }
1050
1051 1
        return (string) \str_replace(
1052 1
            (array) self::$EMOJI_KEYS_CACHE,
1053 1
            (array) self::$EMOJI_VALUES_CACHE,
1054 1
            $str
1055
        );
1056
    }
1057
1058
    /**
1059
     * Encode a string with emoji chars into a non-emoji string.
1060
     *
1061
     * @param string $str                            <p>The input string</p>
1062
     * @param bool   $use_reversible_string_mappings [optional] <p>
1063
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1064
     *                                               between "emoji_encode" and "emoji_decode"</p>
1065
     *
1066
     * @return string
1067
     */
1068 12
    public static function emoji_encode(
1069
        string $str,
1070
        bool $use_reversible_string_mappings = false
1071
    ): string {
1072 12
        self::initEmojiData();
1073
1074 12
        if ($use_reversible_string_mappings === true) {
1075 9
            return (string) \str_replace(
1076 9
                (array) self::$EMOJI_VALUES_CACHE,
1077 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1078 9
                $str
1079
            );
1080
        }
1081
1082 4
        return (string) \str_replace(
1083 4
            (array) self::$EMOJI_VALUES_CACHE,
1084 4
            (array) self::$EMOJI_KEYS_CACHE,
1085 4
            $str
1086
        );
1087
    }
1088
1089
    /**
1090
     * Encode a string with a new charset-encoding.
1091
     *
1092
     * INFO:  This function will also try to fix broken / double encoding,
1093
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1094
     *
1095
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1096
     * @param string $str                           <p>The input string</p>
1097
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1098
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1099
     *                                              string-encoding</p>
1100
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1101
     *                                              A empty string will trigger the autodetect anyway.</p>
1102
     *
1103
     * @return string
1104
     *
1105
     * @psalm-suppress InvalidReturnStatement
1106
     */
1107 28
    public static function encode(
1108
        string $to_encoding,
1109
        string $str,
1110
        bool $auto_detect_the_from_encoding = true,
1111
        string $from_encoding = ''
1112
    ): string {
1113 28
        if ($str === '' || $to_encoding === '') {
1114 13
            return $str;
1115
        }
1116
1117 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1118 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1119
        }
1120
1121 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1122 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1123
        }
1124
1125
        if (
1126 28
            $to_encoding
1127
            &&
1128 28
            $from_encoding
1129
            &&
1130 28
            $from_encoding === $to_encoding
1131
        ) {
1132
            return $str;
1133
        }
1134
1135 28
        if ($to_encoding === 'JSON') {
1136 1
            $return = self::json_encode($str);
1137 1
            if ($return === false) {
1138
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1139
            }
1140
1141 1
            return $return;
1142
        }
1143 28
        if ($from_encoding === 'JSON') {
1144 1
            $str = self::json_decode($str);
1145 1
            $from_encoding = '';
1146
        }
1147
1148 28
        if ($to_encoding === 'BASE64') {
1149 2
            return \base64_encode($str);
1150
        }
1151 28
        if ($from_encoding === 'BASE64') {
1152 2
            $str = \base64_decode($str, true);
1153 2
            $from_encoding = '';
1154
        }
1155
1156 28
        if ($to_encoding === 'HTML-ENTITIES') {
1157 2
            return self::html_encode($str, true, 'UTF-8');
1158
        }
1159 28
        if ($from_encoding === 'HTML-ENTITIES') {
1160 2
            $str = self::html_entity_decode($str, \ENT_COMPAT, 'UTF-8');
1161 2
            $from_encoding = '';
1162
        }
1163
1164 28
        $from_encoding_auto_detected = false;
1165
        if (
1166 28
            $auto_detect_the_from_encoding === true
1167
            ||
1168 28
            !$from_encoding
1169
        ) {
1170 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1171
        }
1172
1173
        // DEBUG
1174
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1175
1176 28
        if ($from_encoding_auto_detected !== false) {
1177
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1178 24
            $from_encoding = $from_encoding_auto_detected;
1179 7
        } elseif ($auto_detect_the_from_encoding === true) {
1180
            // fallback for the "autodetect"-mode
1181 7
            return self::to_utf8($str);
1182
        }
1183
1184
        if (
1185 24
            !$from_encoding
1186
            ||
1187 24
            $from_encoding === $to_encoding
1188
        ) {
1189 15
            return $str;
1190
        }
1191
1192
        if (
1193 19
            $to_encoding === 'UTF-8'
1194
            &&
1195
            (
1196 17
                $from_encoding === 'WINDOWS-1252'
1197
                ||
1198 19
                $from_encoding === 'ISO-8859-1'
1199
            )
1200
        ) {
1201 13
            return self::to_utf8($str);
1202
        }
1203
1204
        if (
1205 12
            $to_encoding === 'ISO-8859-1'
1206
            &&
1207
            (
1208 6
                $from_encoding === 'WINDOWS-1252'
1209
                ||
1210 12
                $from_encoding === 'UTF-8'
1211
            )
1212
        ) {
1213 6
            return self::to_iso8859($str);
1214
        }
1215
1216
        if (
1217 10
            $to_encoding !== 'UTF-8'
1218
            &&
1219 10
            $to_encoding !== 'ISO-8859-1'
1220
            &&
1221 10
            $to_encoding !== 'WINDOWS-1252'
1222
            &&
1223 10
            self::$SUPPORT['mbstring'] === false
1224
        ) {
1225
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1226
        }
1227
1228 10
        if (self::$SUPPORT['mbstring'] === true) {
1229
            // warning: do not use the symfony polyfill here
1230 10
            $str_encoded = \mb_convert_encoding(
1231 10
                $str,
1232 10
                $to_encoding,
1233 10
                $from_encoding
1234
            );
1235
1236 10
            if ($str_encoded) {
1237 10
                return $str_encoded;
1238
            }
1239
        }
1240
1241
        $return = \iconv($from_encoding, $to_encoding, $str);
1242
        if ($return !== false) {
1243
            return $return;
1244
        }
1245
1246
        return $str;
1247
    }
1248
1249
    /**
1250
     * @param string $str
1251
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1252
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1253
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1254
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1255
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1256
     *
1257
     * @return false|string
1258
     *                      <p>An encoded MIME field on success,
1259
     *                      or false if an error occurs during the encoding.</p>
1260
     */
1261
    public static function encode_mimeheader(
1262
        $str,
1263
        $from_charset = 'UTF-8',
1264
        $to_charset = 'UTF-8',
1265
        $transfer_encoding = 'Q',
1266
        $linefeed = '\\r\\n',
1267
        $indent = 76
1268
    ) {
1269
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1270
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1271
        }
1272
1273
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1274
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1275
        }
1276
1277
        return \iconv_mime_encode(
1278
            '',
1279
            $str,
1280
            [
1281
                'scheme'           => $transfer_encoding,
1282
                'line-length'      => $indent,
1283
                'input-charset'    => $from_charset,
1284
                'output-charset'   => $to_charset,
1285
                'line-break-chars' => $linefeed,
1286
            ]
1287
        );
1288
    }
1289
1290
    /**
1291
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1292
     *
1293
     * @param string   $str                       <p>The input string.</p>
1294
     * @param string   $search                    <p>The searched string.</p>
1295
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1296
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1297
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1298
     *
1299
     * @return string
1300
     */
1301 1
    public static function extract_text(
1302
        string $str,
1303
        string $search = '',
1304
        int $length = null,
1305
        string $replacer_for_skipped_text = '…',
1306
        string $encoding = 'UTF-8'
1307
    ): string {
1308 1
        if ($str === '') {
1309 1
            return '';
1310
        }
1311
1312 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1313
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1314
        }
1315
1316 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1317
1318 1
        if ($length === null) {
1319 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1320
        }
1321
1322 1
        if ($search === '') {
1323 1
            if ($encoding === 'UTF-8') {
1324 1
                if ($length > 0) {
1325 1
                    $string_length = (int) \mb_strlen($str);
1326 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1327
                } else {
1328 1
                    $end = 0;
1329
                }
1330
1331 1
                $pos = (int) \min(
1332 1
                    \mb_strpos($str, ' ', $end),
1333 1
                    \mb_strpos($str, '.', $end)
1334
                );
1335
            } else {
1336
                if ($length > 0) {
1337
                    $string_length = (int) self::strlen($str, $encoding);
1338
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1339
                } else {
1340
                    $end = 0;
1341
                }
1342
1343
                $pos = (int) \min(
1344
                    self::strpos($str, ' ', $end, $encoding),
1345
                    self::strpos($str, '.', $end, $encoding)
1346
                );
1347
            }
1348
1349 1
            if ($pos) {
1350 1
                if ($encoding === 'UTF-8') {
1351 1
                    $str_sub = \mb_substr($str, 0, $pos);
1352
                } else {
1353
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1354
                }
1355
1356 1
                if ($str_sub === false) {
1357
                    return '';
1358
                }
1359
1360 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1361
            }
1362
1363
            return $str;
1364
        }
1365
1366 1
        if ($encoding === 'UTF-8') {
1367 1
            $word_position = (int) \mb_stripos($str, $search);
1368 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1369
        } else {
1370
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1371
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1372
        }
1373
1374 1
        $pos_start = 0;
1375 1
        if ($half_side > 0) {
1376 1
            if ($encoding === 'UTF-8') {
1377 1
                $half_text = \mb_substr($str, 0, $half_side);
1378
            } else {
1379
                $half_text = self::substr($str, 0, $half_side, $encoding);
1380
            }
1381 1
            if ($half_text !== false) {
1382 1
                if ($encoding === 'UTF-8') {
1383 1
                    $pos_start = (int) \max(
1384 1
                        \mb_strrpos($half_text, ' '),
1385 1
                        \mb_strrpos($half_text, '.')
1386
                    );
1387
                } else {
1388
                    $pos_start = (int) \max(
1389
                        self::strrpos($half_text, ' ', 0, $encoding),
1390
                        self::strrpos($half_text, '.', 0, $encoding)
1391
                    );
1392
                }
1393
            }
1394
        }
1395
1396 1
        if ($word_position && $half_side > 0) {
1397 1
            $offset = $pos_start + $length - 1;
1398 1
            $real_length = (int) self::strlen($str, $encoding);
1399
1400 1
            if ($offset > $real_length) {
1401
                $offset = $real_length;
1402
            }
1403
1404 1
            if ($encoding === 'UTF-8') {
1405 1
                $pos_end = (int) \min(
1406 1
                    \mb_strpos($str, ' ', $offset),
1407 1
                    \mb_strpos($str, '.', $offset)
1408 1
                ) - $pos_start;
1409
            } else {
1410
                $pos_end = (int) \min(
1411
                    self::strpos($str, ' ', $offset, $encoding),
1412
                    self::strpos($str, '.', $offset, $encoding)
1413
                ) - $pos_start;
1414
            }
1415
1416 1
            if (!$pos_end || $pos_end <= 0) {
1417 1
                if ($encoding === 'UTF-8') {
1418 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1419
                } else {
1420
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1421
                }
1422 1
                if ($str_sub !== false) {
1423 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1424
                } else {
1425 1
                    $extract = '';
1426
                }
1427
            } else {
1428 1
                if ($encoding === 'UTF-8') {
1429 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1430
                } else {
1431
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1432
                }
1433 1
                if ($str_sub !== false) {
1434 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1435
                } else {
1436 1
                    $extract = '';
1437
                }
1438
            }
1439
        } else {
1440 1
            $offset = $length - 1;
1441 1
            $true_length = (int) self::strlen($str, $encoding);
1442
1443 1
            if ($offset > $true_length) {
1444
                $offset = $true_length;
1445
            }
1446
1447 1
            if ($encoding === 'UTF-8') {
1448 1
                $pos_end = (int) \min(
1449 1
                    \mb_strpos($str, ' ', $offset),
1450 1
                    \mb_strpos($str, '.', $offset)
1451
                );
1452
            } else {
1453
                $pos_end = (int) \min(
1454
                    self::strpos($str, ' ', $offset, $encoding),
1455
                    self::strpos($str, '.', $offset, $encoding)
1456
                );
1457
            }
1458
1459 1
            if ($pos_end) {
1460 1
                if ($encoding === 'UTF-8') {
1461 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1462
                } else {
1463
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1464
                }
1465 1
                if ($str_sub !== false) {
1466 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1467
                } else {
1468 1
                    $extract = '';
1469
                }
1470
            } else {
1471 1
                $extract = $str;
1472
            }
1473
        }
1474
1475 1
        return $extract;
1476
    }
1477
1478
    /**
1479
     * Reads entire file into a string.
1480
     *
1481
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1482
     *
1483
     * @see http://php.net/manual/en/function.file-get-contents.php
1484
     *
1485
     * @param string        $filename         <p>
1486
     *                                        Name of the file to read.
1487
     *                                        </p>
1488
     * @param bool          $use_include_path [optional] <p>
1489
     *                                        Prior to PHP 5, this parameter is called
1490
     *                                        use_include_path and is a bool.
1491
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1492
     *                                        to trigger include path
1493
     *                                        search.
1494
     *                                        </p>
1495
     * @param resource|null $context          [optional] <p>
1496
     *                                        A valid context resource created with
1497
     *                                        stream_context_create. If you don't need to use a
1498
     *                                        custom context, you can skip this parameter by &null;.
1499
     *                                        </p>
1500
     * @param int|null      $offset           [optional] <p>
1501
     *                                        The offset where the reading starts.
1502
     *                                        </p>
1503
     * @param int|null      $max_length       [optional] <p>
1504
     *                                        Maximum length of data read. The default is to read until end
1505
     *                                        of file is reached.
1506
     *                                        </p>
1507
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1508
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1509
     *                                        some files, because they used non default utf-8 chars. Binary files
1510
     *                                        like images or pdf will not be converted.</p>
1511
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1512
     *                                        A empty string will trigger the autodetect anyway.</p>
1513
     *
1514
     * @return false|string
1515
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1516
     */
1517 12
    public static function file_get_contents(
1518
        string $filename,
1519
        bool $use_include_path = false,
1520
        $context = null,
1521
        int $offset = null,
1522
        int $max_length = null,
1523
        int $timeout = 10,
1524
        bool $convert_to_utf8 = true,
1525
        string $from_encoding = ''
1526
    ) {
1527
        // init
1528 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1529
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1530 12
        if ($filename === false) {
1531
            return false;
1532
        }
1533
1534 12
        if ($timeout && $context === null) {
1535 9
            $context = \stream_context_create(
1536
                [
1537
                    'http' => [
1538 9
                        'timeout' => $timeout,
1539
                    ],
1540
                ]
1541
            );
1542
        }
1543
1544 12
        if ($offset === null) {
1545 12
            $offset = 0;
1546
        }
1547
1548 12
        if (\is_int($max_length) === true) {
1549 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1550
        } else {
1551 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1552
        }
1553
1554
        // return false on error
1555 12
        if ($data === false) {
1556
            return false;
1557
        }
1558
1559 12
        if ($convert_to_utf8 === true) {
1560
            if (
1561 12
                self::is_binary($data, true) !== true
1562
                ||
1563 9
                self::is_utf16($data, false) !== false
1564
                ||
1565 12
                self::is_utf32($data, false) !== false
1566
            ) {
1567 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1568 9
                $data = self::cleanup($data);
1569
            }
1570
        }
1571
1572 12
        return $data;
1573
    }
1574
1575
    /**
1576
     * Checks if a file starts with BOM (Byte Order Mark) character.
1577
     *
1578
     * @param string $file_path <p>Path to a valid file.</p>
1579
     *
1580
     * @throws \RuntimeException if file_get_contents() returned false
1581
     *
1582
     * @return bool
1583
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1584
     */
1585 2
    public static function file_has_bom(string $file_path): bool
1586
    {
1587 2
        $file_content = \file_get_contents($file_path);
1588 2
        if ($file_content === false) {
1589
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1590
        }
1591
1592 2
        return self::string_has_bom($file_content);
1593
    }
1594
1595
    /**
1596
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1597
     *
1598
     * @param mixed  $var
1599
     * @param int    $normalization_form
1600
     * @param string $leading_combining
1601
     *
1602
     * @return mixed
1603
     */
1604 62
    public static function filter(
1605
        $var,
1606
        int $normalization_form = \Normalizer::NFC,
1607
        string $leading_combining = '◌'
1608
    ) {
1609 62
        switch (\gettype($var)) {
1610 62
            case 'array':
1611
                /** @noinspection ForeachSourceInspection */
1612 6
                foreach ($var as $k => &$v) {
1613 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1614
                }
1615 6
                unset($v);
1616
1617 6
                break;
1618 62
            case 'object':
1619
                /** @noinspection ForeachSourceInspection */
1620 4
                foreach ($var as $k => &$v) {
1621 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1622
                }
1623 4
                unset($v);
1624
1625 4
                break;
1626 62
            case 'string':
1627
1628 62
                if (\strpos($var, "\r") !== false) {
1629
                    // Workaround https://bugs.php.net/65732
1630 3
                    $var = self::normalize_line_ending($var);
1631
                }
1632
1633 62
                if (ASCII::is_ascii($var) === false) {
1634 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1635 27
                        $n = '-';
1636
                    } else {
1637 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1638
1639 12
                        if (isset($n[0])) {
1640 7
                            $var = $n;
1641
                        } else {
1642 8
                            $var = self::encode('UTF-8', $var, true);
1643
                        }
1644
                    }
1645
1646
                    if (
1647 32
                        $var[0] >= "\x80"
1648
                        &&
1649 32
                        isset($n[0], $leading_combining[0])
1650
                        &&
1651 32
                        \preg_match('/^\\p{Mn}/u', $var)
1652
                    ) {
1653
                        // Prevent leading combining chars
1654
                        // for NFC-safe concatenations.
1655 3
                        $var = $leading_combining . $var;
1656
                    }
1657
                }
1658
1659 62
                break;
1660
        }
1661
1662 62
        return $var;
1663
    }
1664
1665
    /**
1666
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1667
     *
1668
     * Gets a specific external variable by name and optionally filters it
1669
     *
1670
     * @see http://php.net/manual/en/function.filter-input.php
1671
     *
1672
     * @param int    $type          <p>
1673
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1674
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1675
     *                              <b>INPUT_ENV</b>.
1676
     *                              </p>
1677
     * @param string $variable_name <p>
1678
     *                              Name of a variable to get.
1679
     *                              </p>
1680
     * @param int    $filter        [optional] <p>
1681
     *                              The ID of the filter to apply. The
1682
     *                              manual page lists the available filters.
1683
     *                              </p>
1684
     * @param mixed  $options       [optional] <p>
1685
     *                              Associative array of options or bitwise disjunction of flags. If filter
1686
     *                              accepts options, flags can be provided in "flags" field of array.
1687
     *                              </p>
1688
     *
1689
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1690
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1691
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1692
     */
1693
    public static function filter_input(
1694
        int $type,
1695
        string $variable_name,
1696
        int $filter = \FILTER_DEFAULT,
1697
        $options = null
1698
    ) {
1699
        if ($options === null || \func_num_args() < 4) {
1700
            $var = \filter_input($type, $variable_name, $filter);
1701
        } else {
1702
            $var = \filter_input($type, $variable_name, $filter, $options);
1703
        }
1704
1705
        return self::filter($var);
1706
    }
1707
1708
    /**
1709
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
     *
1711
     * Gets external variables and optionally filters them
1712
     *
1713
     * @see http://php.net/manual/en/function.filter-input-array.php
1714
     *
1715
     * @param int   $type       <p>
1716
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1717
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1718
     *                          <b>INPUT_ENV</b>.
1719
     *                          </p>
1720
     * @param mixed $definition [optional] <p>
1721
     *                          An array defining the arguments. A valid key is a string
1722
     *                          containing a variable name and a valid value is either a filter type, or an array
1723
     *                          optionally specifying the filter, flags and options. If the value is an
1724
     *                          array, valid keys are filter which specifies the
1725
     *                          filter type,
1726
     *                          flags which specifies any flags that apply to the
1727
     *                          filter, and options which specifies any options that
1728
     *                          apply to the filter. See the example below for a better understanding.
1729
     *                          </p>
1730
     *                          <p>
1731
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1732
     *                          input array are filtered by this filter.
1733
     *                          </p>
1734
     * @param bool  $add_empty  [optional] <p>
1735
     *                          Add missing keys as <b>NULL</b> to the return value.
1736
     *                          </p>
1737
     *
1738
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1739
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1740
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1741
     *               is not set and <b>NULL</b> if the filter fails.
1742
     */
1743
    public static function filter_input_array(
1744
        int $type,
1745
        $definition = null,
1746
        bool $add_empty = true
1747
    ) {
1748
        if ($definition === null || \func_num_args() < 2) {
1749
            $a = \filter_input_array($type);
1750
        } else {
1751
            $a = \filter_input_array($type, $definition, $add_empty);
1752
        }
1753
1754
        return self::filter($a);
1755
    }
1756
1757
    /**
1758
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1759
     *
1760
     * Filters a variable with a specified filter
1761
     *
1762
     * @see http://php.net/manual/en/function.filter-var.php
1763
     *
1764
     * @param mixed $variable <p>
1765
     *                        Value to filter.
1766
     *                        </p>
1767
     * @param int   $filter   [optional] <p>
1768
     *                        The ID of the filter to apply. The
1769
     *                        manual page lists the available filters.
1770
     *                        </p>
1771
     * @param mixed $options  [optional] <p>
1772
     *                        Associative array of options or bitwise disjunction of flags. If filter
1773
     *                        accepts options, flags can be provided in "flags" field of array. For
1774
     *                        the "callback" filter, callable type should be passed. The
1775
     *                        callback must accept one argument, the value to be filtered, and return
1776
     *                        the value after filtering/sanitizing it.
1777
     *                        </p>
1778
     *                        <p>
1779
     *                        <code>
1780
     *                        // for filters that accept options, use this format
1781
     *                        $options = array(
1782
     *                        'options' => array(
1783
     *                        'default' => 3, // value to return if the filter fails
1784
     *                        // other options here
1785
     *                        'min_range' => 0
1786
     *                        ),
1787
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1788
     *                        );
1789
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1790
     *                        // for filter that only accept flags, you can pass them directly
1791
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1792
     *                        // for filter that only accept flags, you can also pass as an array
1793
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1794
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1795
     *                        // callback validate filter
1796
     *                        function foo($value)
1797
     *                        {
1798
     *                        // Expected format: Surname, GivenNames
1799
     *                        if (strpos($value, ", ") === false) return false;
1800
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1801
     *                        $empty = (empty($surname) || empty($givennames));
1802
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1803
     *                        if ($empty || $notstrings) {
1804
     *                        return false;
1805
     *                        } else {
1806
     *                        return $value;
1807
     *                        }
1808
     *                        }
1809
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1810
     *                        </code>
1811
     *                        </p>
1812
     *
1813
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1814
     */
1815 2
    public static function filter_var(
1816
        $variable,
1817
        int $filter = \FILTER_DEFAULT,
1818
        $options = null
1819
    ) {
1820 2
        if (\func_num_args() < 3) {
1821 2
            $variable = \filter_var($variable, $filter);
1822
        } else {
1823 2
            $variable = \filter_var($variable, $filter, $options);
1824
        }
1825
1826 2
        return self::filter($variable);
1827
    }
1828
1829
    /**
1830
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1831
     *
1832
     * Gets multiple variables and optionally filters them
1833
     *
1834
     * @see http://php.net/manual/en/function.filter-var-array.php
1835
     *
1836
     * @param array $data       <p>
1837
     *                          An array with string keys containing the data to filter.
1838
     *                          </p>
1839
     * @param mixed $definition [optional] <p>
1840
     *                          An array defining the arguments. A valid key is a string
1841
     *                          containing a variable name and a valid value is either a
1842
     *                          filter type, or an
1843
     *                          array optionally specifying the filter, flags and options.
1844
     *                          If the value is an array, valid keys are filter
1845
     *                          which specifies the filter type,
1846
     *                          flags which specifies any flags that apply to the
1847
     *                          filter, and options which specifies any options that
1848
     *                          apply to the filter. See the example below for a better understanding.
1849
     *                          </p>
1850
     *                          <p>
1851
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1852
     *                          input array are filtered by this filter.
1853
     *                          </p>
1854
     * @param bool  $add_empty  [optional] <p>
1855
     *                          Add missing keys as <b>NULL</b> to the return value.
1856
     *                          </p>
1857
     *
1858
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1859
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1860
     *               set
1861
     */
1862 2
    public static function filter_var_array(
1863
        array $data,
1864
        $definition = null,
1865
        bool $add_empty = true
1866
    ) {
1867 2
        if (\func_num_args() < 2) {
1868 2
            $a = \filter_var_array($data);
1869
        } else {
1870 2
            $a = \filter_var_array($data, $definition, $add_empty);
1871
        }
1872
1873 2
        return self::filter($a);
1874
    }
1875
1876
    /**
1877
     * Checks whether finfo is available on the server.
1878
     *
1879
     * @return bool
1880
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1881
     */
1882
    public static function finfo_loaded(): bool
1883
    {
1884
        return \class_exists('finfo');
1885
    }
1886
1887
    /**
1888
     * Returns the first $n characters of the string.
1889
     *
1890
     * @param string $str      <p>The input string.</p>
1891
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1892
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1893
     *
1894
     * @return string
1895
     */
1896 13
    public static function first_char(
1897
        string $str,
1898
        int $n = 1,
1899
        string $encoding = 'UTF-8'
1900
    ): string {
1901 13
        if ($str === '' || $n <= 0) {
1902 5
            return '';
1903
        }
1904
1905 8
        if ($encoding === 'UTF-8') {
1906 4
            return (string) \mb_substr($str, 0, $n);
1907
        }
1908
1909 4
        return (string) self::substr($str, 0, $n, $encoding);
1910
    }
1911
1912
    /**
1913
     * Check if the number of Unicode characters isn't greater than the specified integer.
1914
     *
1915
     * @param string $str      the original string to be checked
1916
     * @param int    $box_size the size in number of chars to be checked against string
1917
     *
1918
     * @return bool true if string is less than or equal to $box_size, false otherwise
1919
     */
1920 2
    public static function fits_inside(string $str, int $box_size): bool
1921
    {
1922 2
        return (int) self::strlen($str) <= $box_size;
1923
    }
1924
1925
    /**
1926
     * Try to fix simple broken UTF-8 strings.
1927
     *
1928
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1929
     *
1930
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1931
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1932
     * See: http://en.wikipedia.org/wiki/Windows-1252
1933
     *
1934
     * @param string $str <p>The input string</p>
1935
     *
1936
     * @return string
1937
     */
1938 47
    public static function fix_simple_utf8(string $str): string
1939
    {
1940 47
        if ($str === '') {
1941 4
            return '';
1942
        }
1943
1944 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1945 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1946
1947 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1948 1
            if (self::$BROKEN_UTF8_FIX === null) {
1949 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1950
            }
1951
1952 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1953 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1954
        }
1955
1956 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1957
    }
1958
1959
    /**
1960
     * Fix a double (or multiple) encoded UTF8 string.
1961
     *
1962
     * @param string|string[] $str you can use a string or an array of strings
1963
     *
1964
     * @return string|string[]
1965
     *                         Will return the fixed input-"array" or
1966
     *                         the fixed input-"string"
1967
     *
1968
     * @psalm-suppress InvalidReturnType
1969
     */
1970 2
    public static function fix_utf8($str)
1971
    {
1972 2
        if (\is_array($str) === true) {
1973 2
            foreach ($str as $k => &$v) {
1974 2
                $v = self::fix_utf8($v);
1975
            }
1976 2
            unset($v);
1977
1978
            /**
1979
             * @psalm-suppress InvalidReturnStatement
1980
             */
1981 2
            return $str;
1982
        }
1983
1984 2
        $str = (string) $str;
1985 2
        $last = '';
1986 2
        while ($last !== $str) {
1987 2
            $last = $str;
1988
            /**
1989
             * @psalm-suppress PossiblyInvalidArgument
1990
             */
1991 2
            $str = self::to_utf8(
1992 2
                self::utf8_decode($str, true)
1993
            );
1994
        }
1995
1996
        /**
1997
         * @psalm-suppress InvalidReturnStatement
1998
         */
1999 2
        return $str;
2000
    }
2001
2002
    /**
2003
     * Get character of a specific character.
2004
     *
2005
     * @param string $char
2006
     *
2007
     * @return string 'RTL' or 'LTR'
2008
     */
2009 2
    public static function getCharDirection(string $char): string
2010
    {
2011 2
        if (self::$SUPPORT['intlChar'] === true) {
2012
            /** @noinspection PhpComposerExtensionStubsInspection */
2013 2
            $tmp_return = \IntlChar::charDirection($char);
2014
2015
            // from "IntlChar"-Class
2016
            $char_direction = [
2017 2
                'RTL' => [1, 13, 14, 15, 21],
2018
                'LTR' => [0, 11, 12, 20],
2019
            ];
2020
2021 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2022
                return 'LTR';
2023
            }
2024
2025 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2026 2
                return 'RTL';
2027
            }
2028
        }
2029
2030 2
        $c = static::chr_to_decimal($char);
2031
2032 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2033 2
            return 'LTR';
2034
        }
2035
2036 2
        if ($c <= 0x85e) {
2037 2
            if ($c === 0x5be ||
2038 2
                $c === 0x5c0 ||
2039 2
                $c === 0x5c3 ||
2040 2
                $c === 0x5c6 ||
2041 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2042 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2043 2
                $c === 0x608 ||
2044 2
                $c === 0x60b ||
2045 2
                $c === 0x60d ||
2046 2
                $c === 0x61b ||
2047 2
                ($c >= 0x61e && $c <= 0x64a) ||
2048
                ($c >= 0x66d && $c <= 0x66f) ||
2049
                ($c >= 0x671 && $c <= 0x6d5) ||
2050
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2051
                ($c >= 0x6ee && $c <= 0x6ef) ||
2052
                ($c >= 0x6fa && $c <= 0x70d) ||
2053
                $c === 0x710 ||
2054
                ($c >= 0x712 && $c <= 0x72f) ||
2055
                ($c >= 0x74d && $c <= 0x7a5) ||
2056
                $c === 0x7b1 ||
2057
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2058
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2059
                $c === 0x7fa ||
2060
                ($c >= 0x800 && $c <= 0x815) ||
2061
                $c === 0x81a ||
2062
                $c === 0x824 ||
2063
                $c === 0x828 ||
2064
                ($c >= 0x830 && $c <= 0x83e) ||
2065
                ($c >= 0x840 && $c <= 0x858) ||
2066 2
                $c === 0x85e
2067
            ) {
2068 2
                return 'RTL';
2069
            }
2070 2
        } elseif ($c === 0x200f) {
2071
            return 'RTL';
2072 2
        } elseif ($c >= 0xfb1d) {
2073 2
            if ($c === 0xfb1d ||
2074 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2075 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2076 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2077 2
                $c === 0xfb3e ||
2078 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2079 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2080 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2081 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2082 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2083 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2084 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2085 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2086 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2087 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2088 2
                $c === 0x10808 ||
2089 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2090 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2091 2
                $c === 0x1083c ||
2092 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2093 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2094 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2095 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2096 2
                $c === 0x1093f ||
2097 2
                $c === 0x10a00 ||
2098 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2099 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2100 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2101 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2102 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2103 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2104 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2105 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2106 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2107 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2108
            ) {
2109 2
                return 'RTL';
2110
            }
2111
        }
2112
2113 2
        return 'LTR';
2114
    }
2115
2116
    /**
2117
     * Check for php-support.
2118
     *
2119
     * @param string|null $key
2120
     *
2121
     * @return mixed
2122
     *               Return the full support-"array", if $key === null<br>
2123
     *               return bool-value, if $key is used and available<br>
2124
     *               otherwise return <strong>null</strong>
2125
     */
2126 27
    public static function getSupportInfo(string $key = null)
2127
    {
2128 27
        if ($key === null) {
2129 4
            return self::$SUPPORT;
2130
        }
2131
2132 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2133 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2134
        }
2135
        // compatibility fix for old versions
2136 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2137
2138 25
        return self::$SUPPORT[$key] ?? null;
2139
    }
2140
2141
    /**
2142
     * Warning: this method only works for some file-types (png, jpg)
2143
     *          if you need more supported types, please use e.g. "finfo"
2144
     *
2145
     * @param string $str
2146
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2147
     *
2148
     * @return array<string, string|null>
2149
     *                       <p>with this keys: 'ext', 'mime', 'type'</p>
2150
     */
2151 39
    public static function get_file_type(
2152
        string $str,
2153
        array $fallback = [
2154
            'ext'  => null,
2155
            'mime' => 'application/octet-stream',
2156
            'type' => null,
2157
        ]
2158
    ): array {
2159 39
        if ($str === '') {
2160
            return $fallback;
2161
        }
2162
2163
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2164 39
        $str_info = \substr($str, 0, 2);
2165 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2166 11
            return $fallback;
2167
        }
2168
2169
        // DEBUG
2170
        //var_dump($str_info);
2171
2172
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2173 35
        $str_info = \unpack('C2chars', $str_info);
0 ignored issues
show
Bug introduced by
$str_info of type array|false is incompatible with the type string expected by parameter $data of unpack(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2173
        $str_info = \unpack('C2chars', /** @scrutinizer ignore-type */ $str_info);
Loading history...
2174 35
        if ($str_info === false) {
2175
            return $fallback;
2176
        }
2177
        /** @noinspection OffsetOperationsInspection */
2178 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2179
2180
        // DEBUG
2181
        //var_dump($type_code);
2182
2183
        //
2184
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2185
        //
2186
        switch ($type_code) {
2187
            // WARNING: do not add too simple comparisons, because of false-positive results:
2188
            //
2189
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2190
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2191
            //
2192 35
            case 255216:
2193
                $ext = 'jpg';
2194
                $mime = 'image/jpeg';
2195
                $type = 'binary';
2196
2197
                break;
2198 35
            case 13780:
2199 7
                $ext = 'png';
2200 7
                $mime = 'image/png';
2201 7
                $type = 'binary';
2202
2203 7
                break;
2204
            default:
2205 34
                return $fallback;
2206
        }
2207
2208
        return [
2209 7
            'ext'  => $ext,
2210 7
            'mime' => $mime,
2211 7
            'type' => $type,
2212
        ];
2213
    }
2214
2215
    /**
2216
     * @param int    $length         <p>Length of the random string.</p>
2217
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2218
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2219
     *
2220
     * @return string
2221
     */
2222 1
    public static function get_random_string(
2223
        int $length,
2224
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2225
        string $encoding = 'UTF-8'
2226
    ): string {
2227
        // init
2228 1
        $i = 0;
2229 1
        $str = '';
2230
2231
        //
2232
        // add random chars
2233
        //
2234
2235 1
        if ($encoding === 'UTF-8') {
2236 1
            $max_length = (int) \mb_strlen($possible_chars);
2237 1
            if ($max_length === 0) {
2238 1
                return '';
2239
            }
2240
2241 1
            while ($i < $length) {
2242
                try {
2243 1
                    $rand_int = \random_int(0, $max_length - 1);
2244
                } catch (\Exception $e) {
2245
                    /** @noinspection RandomApiMigrationInspection */
2246
                    $rand_int = \mt_rand(0, $max_length - 1);
2247
                }
2248 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2249 1
                if ($char !== false) {
2250 1
                    $str .= $char;
2251 1
                    ++$i;
2252
                }
2253
            }
2254
        } else {
2255
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2256
2257
            $max_length = (int) self::strlen($possible_chars, $encoding);
2258
            if ($max_length === 0) {
2259
                return '';
2260
            }
2261
2262
            while ($i < $length) {
2263
                try {
2264
                    $rand_int = \random_int(0, $max_length - 1);
2265
                } catch (\Exception $e) {
2266
                    /** @noinspection RandomApiMigrationInspection */
2267
                    $rand_int = \mt_rand(0, $max_length - 1);
2268
                }
2269
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2270
                if ($char !== false) {
2271
                    $str .= $char;
2272
                    ++$i;
2273
                }
2274
            }
2275
        }
2276
2277 1
        return $str;
2278
    }
2279
2280
    /**
2281
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2282
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2283
     *
2284
     * @return string
2285
     */
2286 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2287
    {
2288 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2289 1
                        \session_id() .
2290 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2291 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2292 1
                        $entropy_extra;
2293
2294 1
        $unique_string = \uniqid($unique_helper, true);
2295
2296 1
        if ($use_md5) {
2297 1
            $unique_string = \md5($unique_string . $unique_helper);
2298
        }
2299
2300 1
        return $unique_string;
2301
    }
2302
2303
    /**
2304
     * alias for "UTF8::string_has_bom()"
2305
     *
2306
     * @param string $str
2307
     *
2308
     * @return bool
2309
     *
2310
     * @see UTF8::string_has_bom()
2311
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2312
     */
2313 2
    public static function hasBom(string $str): bool
2314
    {
2315 2
        return self::string_has_bom($str);
2316
    }
2317
2318
    /**
2319
     * Returns true if the string contains a lower case char, false otherwise.
2320
     *
2321
     * @param string $str <p>The input string.</p>
2322
     *
2323
     * @return bool
2324
     *              <p>Whether or not the string contains a lower case character.</p>
2325
     */
2326 47
    public static function has_lowercase(string $str): bool
2327
    {
2328 47
        if (self::$SUPPORT['mbstring'] === true) {
2329
            /** @noinspection PhpComposerExtensionStubsInspection */
2330 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2331
        }
2332
2333
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2334
    }
2335
2336
    /**
2337
     * Returns true if the string contains whitespace, false otherwise.
2338
     *
2339
     * @param string $str <p>The input string.</p>
2340
     *
2341
     * @return bool
2342
     *              <p>Whether or not the string contains whitespace.</p>
2343
     */
2344 11
    public static function has_whitespace(string $str): bool
2345
    {
2346 11
        if (self::$SUPPORT['mbstring'] === true) {
2347
            /** @noinspection PhpComposerExtensionStubsInspection */
2348 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2349
        }
2350
2351
        return self::str_matches_pattern($str, '.*[[:space:]]');
2352
    }
2353
2354
    /**
2355
     * Returns true if the string contains an upper case char, false otherwise.
2356
     *
2357
     * @param string $str <p>The input string.</p>
2358
     *
2359
     * @return bool whether or not the string contains an upper case character
2360
     */
2361 12
    public static function has_uppercase(string $str): bool
2362
    {
2363 12
        if (self::$SUPPORT['mbstring'] === true) {
2364
            /** @noinspection PhpComposerExtensionStubsInspection */
2365 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2366
        }
2367
2368
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2369
    }
2370
2371
    /**
2372
     * Converts a hexadecimal value into a UTF-8 character.
2373
     *
2374
     * @param string $hexdec <p>The hexadecimal value.</p>
2375
     *
2376
     * @return false|string one single UTF-8 character
2377
     */
2378 4
    public static function hex_to_chr(string $hexdec)
2379
    {
2380 4
        return self::decimal_to_chr(\hexdec($hexdec));
2381
    }
2382
2383
    /**
2384
     * Converts hexadecimal U+xxxx code point representation to integer.
2385
     *
2386
     * INFO: opposite to UTF8::int_to_hex()
2387
     *
2388
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2389
     *
2390
     * @return false|int the code point, or false on failure
2391
     */
2392 2
    public static function hex_to_int($hexdec)
2393
    {
2394
        // init
2395 2
        $hexdec = (string) $hexdec;
2396
2397 2
        if ($hexdec === '') {
2398 2
            return false;
2399
        }
2400
2401 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2402 2
            return \intval($match[1], 16);
2403
        }
2404
2405 2
        return false;
2406
    }
2407
2408
    /**
2409
     * alias for "UTF8::html_entity_decode()"
2410
     *
2411
     * @param string $str
2412
     * @param int    $flags
2413
     * @param string $encoding
2414
     *
2415
     * @return string
2416
     *
2417
     * @see UTF8::html_entity_decode()
2418
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2419
     */
2420 2
    public static function html_decode(
2421
        string $str,
2422
        int $flags = null,
2423
        string $encoding = 'UTF-8'
2424
    ): string {
2425 2
        return self::html_entity_decode($str, $flags, $encoding);
2426
    }
2427
2428
    /**
2429
     * Converts a UTF-8 string to a series of HTML numbered entities.
2430
     *
2431
     * INFO: opposite to UTF8::html_decode()
2432
     *
2433
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2434
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2435
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2436
     *
2437
     * @return string HTML numbered entities
2438
     */
2439 14
    public static function html_encode(
2440
        string $str,
2441
        bool $keep_ascii_chars = false,
2442
        string $encoding = 'UTF-8'
2443
    ): string {
2444 14
        if ($str === '') {
2445 4
            return '';
2446
        }
2447
2448 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2449 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2450
        }
2451
2452
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2453 14
        if (self::$SUPPORT['mbstring'] === true) {
2454 14
            $start_code = 0x00;
2455 14
            if ($keep_ascii_chars === true) {
2456 13
                $start_code = 0x80;
2457
            }
2458
2459 14
            if ($encoding === 'UTF-8') {
2460
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2461 14
                $return = \mb_encode_numericentity(
2462 14
                    $str,
2463 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2464
                );
2465 14
                if ($return !== null && $return !== false) {
2466 14
                    return $return;
2467
                }
2468
            }
2469
2470
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2471 4
            $return = \mb_encode_numericentity(
2472 4
                $str,
2473 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2474 4
                $encoding
2475
            );
2476 4
            if ($return !== null && $return !== false) {
2477 4
                return $return;
2478
            }
2479
        }
2480
2481
        //
2482
        // fallback via vanilla php
2483
        //
2484
2485
        return \implode(
2486
            '',
2487
            \array_map(
2488
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2489
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2490
                },
2491
                self::str_split($str)
2492
            )
2493
        );
2494
    }
2495
2496
    /**
2497
     * UTF-8 version of html_entity_decode()
2498
     *
2499
     * The reason we are not using html_entity_decode() by itself is because
2500
     * while it is not technically correct to leave out the semicolon
2501
     * at the end of an entity most browsers will still interpret the entity
2502
     * correctly. html_entity_decode() does not convert entities without
2503
     * semicolons, so we are left with our own little solution here. Bummer.
2504
     *
2505
     * Convert all HTML entities to their applicable characters
2506
     *
2507
     * INFO: opposite to UTF8::html_encode()
2508
     *
2509
     * @see http://php.net/manual/en/function.html-entity-decode.php
2510
     *
2511
     * @param string $str      <p>
2512
     *                         The input string.
2513
     *                         </p>
2514
     * @param int    $flags    [optional] <p>
2515
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2516
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2517
     *                         <table>
2518
     *                         Available <i>flags</i> constants
2519
     *                         <tr valign="top">
2520
     *                         <td>Constant Name</td>
2521
     *                         <td>Description</td>
2522
     *                         </tr>
2523
     *                         <tr valign="top">
2524
     *                         <td><b>ENT_COMPAT</b></td>
2525
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2526
     *                         </tr>
2527
     *                         <tr valign="top">
2528
     *                         <td><b>ENT_QUOTES</b></td>
2529
     *                         <td>Will convert both double and single quotes.</td>
2530
     *                         </tr>
2531
     *                         <tr valign="top">
2532
     *                         <td><b>ENT_NOQUOTES</b></td>
2533
     *                         <td>Will leave both double and single quotes unconverted.</td>
2534
     *                         </tr>
2535
     *                         <tr valign="top">
2536
     *                         <td><b>ENT_HTML401</b></td>
2537
     *                         <td>
2538
     *                         Handle code as HTML 4.01.
2539
     *                         </td>
2540
     *                         </tr>
2541
     *                         <tr valign="top">
2542
     *                         <td><b>ENT_XML1</b></td>
2543
     *                         <td>
2544
     *                         Handle code as XML 1.
2545
     *                         </td>
2546
     *                         </tr>
2547
     *                         <tr valign="top">
2548
     *                         <td><b>ENT_XHTML</b></td>
2549
     *                         <td>
2550
     *                         Handle code as XHTML.
2551
     *                         </td>
2552
     *                         </tr>
2553
     *                         <tr valign="top">
2554
     *                         <td><b>ENT_HTML5</b></td>
2555
     *                         <td>
2556
     *                         Handle code as HTML 5.
2557
     *                         </td>
2558
     *                         </tr>
2559
     *                         </table>
2560
     *                         </p>
2561
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2562
     *
2563
     * @return string the decoded string
2564
     */
2565 51
    public static function html_entity_decode(
2566
        string $str,
2567
        int $flags = null,
2568
        string $encoding = 'UTF-8'
2569
    ): string {
2570
        if (
2571 51
            !isset($str[3]) // examples: &; || &x;
2572
            ||
2573 51
            \strpos($str, '&') === false // no "&"
2574
        ) {
2575 24
            return $str;
2576
        }
2577
2578 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2579 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2580
        }
2581
2582 49
        if ($flags === null) {
2583 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
2584
        }
2585
2586
        if (
2587 49
            $encoding !== 'UTF-8'
2588
            &&
2589 49
            $encoding !== 'ISO-8859-1'
2590
            &&
2591 49
            $encoding !== 'WINDOWS-1252'
2592
            &&
2593 49
            self::$SUPPORT['mbstring'] === false
2594
        ) {
2595
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2596
        }
2597
2598
        do {
2599 49
            $str_compare = $str;
2600
2601 49
            if (\strpos($str, '&') !== false) {
2602 49
                if (\strpos($str, '&#') !== false) {
2603
                    // decode also numeric & UTF16 two byte entities
2604 41
                    $str = (string) \preg_replace(
2605 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2606 41
                        '$1;',
2607 41
                        $str
2608
                    );
2609
                }
2610
2611 49
                $str = \html_entity_decode(
2612 49
                    $str,
2613 49
                    $flags,
2614 49
                    $encoding
2615
                );
2616
            }
2617 49
        } while ($str_compare !== $str);
2618
2619 49
        return $str;
2620
    }
2621
2622
    /**
2623
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2624
     *
2625
     * @param string $str
2626
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2627
     *
2628
     * @return string
2629
     */
2630 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2631
    {
2632 6
        return self::htmlspecialchars(
2633 6
            $str,
2634 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2635 6
            $encoding
2636
        );
2637
    }
2638
2639
    /**
2640
     * Remove empty html-tag.
2641
     *
2642
     * e.g.: <tag></tag>
2643
     *
2644
     * @param string $str
2645
     *
2646
     * @return string
2647
     */
2648 1
    public static function html_stripe_empty_tags(string $str): string
2649
    {
2650 1
        return (string) \preg_replace(
2651 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2652 1
            '',
2653 1
            $str
2654
        );
2655
    }
2656
2657
    /**
2658
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2659
     *
2660
     * @see http://php.net/manual/en/function.htmlentities.php
2661
     *
2662
     * @param string $str           <p>
2663
     *                              The input string.
2664
     *                              </p>
2665
     * @param int    $flags         [optional] <p>
2666
     *                              A bitmask of one or more of the following flags, which specify how to handle
2667
     *                              quotes, invalid code unit sequences and the used document type. The default is
2668
     *                              ENT_COMPAT | ENT_HTML401.
2669
     *                              <table>
2670
     *                              Available <i>flags</i> constants
2671
     *                              <tr valign="top">
2672
     *                              <td>Constant Name</td>
2673
     *                              <td>Description</td>
2674
     *                              </tr>
2675
     *                              <tr valign="top">
2676
     *                              <td><b>ENT_COMPAT</b></td>
2677
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2678
     *                              </tr>
2679
     *                              <tr valign="top">
2680
     *                              <td><b>ENT_QUOTES</b></td>
2681
     *                              <td>Will convert both double and single quotes.</td>
2682
     *                              </tr>
2683
     *                              <tr valign="top">
2684
     *                              <td><b>ENT_NOQUOTES</b></td>
2685
     *                              <td>Will leave both double and single quotes unconverted.</td>
2686
     *                              </tr>
2687
     *                              <tr valign="top">
2688
     *                              <td><b>ENT_IGNORE</b></td>
2689
     *                              <td>
2690
     *                              Silently discard invalid code unit sequences instead of returning
2691
     *                              an empty string. Using this flag is discouraged as it
2692
     *                              may have security implications.
2693
     *                              </td>
2694
     *                              </tr>
2695
     *                              <tr valign="top">
2696
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2697
     *                              <td>
2698
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2699
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2700
     *                              string.
2701
     *                              </td>
2702
     *                              </tr>
2703
     *                              <tr valign="top">
2704
     *                              <td><b>ENT_DISALLOWED</b></td>
2705
     *                              <td>
2706
     *                              Replace invalid code points for the given document type with a
2707
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2708
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2709
     *                              instance, to ensure the well-formedness of XML documents with
2710
     *                              embedded external content.
2711
     *                              </td>
2712
     *                              </tr>
2713
     *                              <tr valign="top">
2714
     *                              <td><b>ENT_HTML401</b></td>
2715
     *                              <td>
2716
     *                              Handle code as HTML 4.01.
2717
     *                              </td>
2718
     *                              </tr>
2719
     *                              <tr valign="top">
2720
     *                              <td><b>ENT_XML1</b></td>
2721
     *                              <td>
2722
     *                              Handle code as XML 1.
2723
     *                              </td>
2724
     *                              </tr>
2725
     *                              <tr valign="top">
2726
     *                              <td><b>ENT_XHTML</b></td>
2727
     *                              <td>
2728
     *                              Handle code as XHTML.
2729
     *                              </td>
2730
     *                              </tr>
2731
     *                              <tr valign="top">
2732
     *                              <td><b>ENT_HTML5</b></td>
2733
     *                              <td>
2734
     *                              Handle code as HTML 5.
2735
     *                              </td>
2736
     *                              </tr>
2737
     *                              </table>
2738
     *                              </p>
2739
     * @param string $encoding      [optional] <p>
2740
     *                              Like <b>htmlspecialchars</b>,
2741
     *                              <b>htmlentities</b> takes an optional third argument
2742
     *                              <i>encoding</i> which defines encoding used in
2743
     *                              conversion.
2744
     *                              Although this argument is technically optional, you are highly
2745
     *                              encouraged to specify the correct value for your code.
2746
     *                              </p>
2747
     * @param bool   $double_encode [optional] <p>
2748
     *                              When <i>double_encode</i> is turned off PHP will not
2749
     *                              encode existing html entities. The default is to convert everything.
2750
     *                              </p>
2751
     *
2752
     * @return string
2753
     *                <p>
2754
     *                The encoded string.
2755
     *                <br><br>
2756
     *                If the input <i>string</i> contains an invalid code unit
2757
     *                sequence within the given <i>encoding</i> an empty string
2758
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2759
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2760
     *                </p>
2761
     */
2762 9
    public static function htmlentities(
2763
        string $str,
2764
        int $flags = \ENT_COMPAT,
2765
        string $encoding = 'UTF-8',
2766
        bool $double_encode = true
2767
    ): string {
2768 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2769 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2770
        }
2771
2772 9
        $str = \htmlentities(
2773 9
            $str,
2774 9
            $flags,
2775 9
            $encoding,
2776 9
            $double_encode
2777
        );
2778
2779
        /**
2780
         * PHP doesn't replace a backslash to its html entity since this is something
2781
         * that's mostly used to escape characters when inserting in a database. Since
2782
         * we're using a decent database layer, we don't need this shit and we're replacing
2783
         * the double backslashes by its' html entity equivalent.
2784
         *
2785
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2786
         */
2787 9
        $str = \str_replace('\\', '&#92;', $str);
2788
2789 9
        return self::html_encode($str, true, $encoding);
2790
    }
2791
2792
    /**
2793
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2794
     *
2795
     * INFO: Take a look at "UTF8::htmlentities()"
2796
     *
2797
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2798
     *
2799
     * @param string $str           <p>
2800
     *                              The string being converted.
2801
     *                              </p>
2802
     * @param int    $flags         [optional] <p>
2803
     *                              A bitmask of one or more of the following flags, which specify how to handle
2804
     *                              quotes, invalid code unit sequences and the used document type. The default is
2805
     *                              ENT_COMPAT | ENT_HTML401.
2806
     *                              <table>
2807
     *                              Available <i>flags</i> constants
2808
     *                              <tr valign="top">
2809
     *                              <td>Constant Name</td>
2810
     *                              <td>Description</td>
2811
     *                              </tr>
2812
     *                              <tr valign="top">
2813
     *                              <td><b>ENT_COMPAT</b></td>
2814
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2815
     *                              </tr>
2816
     *                              <tr valign="top">
2817
     *                              <td><b>ENT_QUOTES</b></td>
2818
     *                              <td>Will convert both double and single quotes.</td>
2819
     *                              </tr>
2820
     *                              <tr valign="top">
2821
     *                              <td><b>ENT_NOQUOTES</b></td>
2822
     *                              <td>Will leave both double and single quotes unconverted.</td>
2823
     *                              </tr>
2824
     *                              <tr valign="top">
2825
     *                              <td><b>ENT_IGNORE</b></td>
2826
     *                              <td>
2827
     *                              Silently discard invalid code unit sequences instead of returning
2828
     *                              an empty string. Using this flag is discouraged as it
2829
     *                              may have security implications.
2830
     *                              </td>
2831
     *                              </tr>
2832
     *                              <tr valign="top">
2833
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2834
     *                              <td>
2835
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2836
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2837
     *                              string.
2838
     *                              </td>
2839
     *                              </tr>
2840
     *                              <tr valign="top">
2841
     *                              <td><b>ENT_DISALLOWED</b></td>
2842
     *                              <td>
2843
     *                              Replace invalid code points for the given document type with a
2844
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2845
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2846
     *                              instance, to ensure the well-formedness of XML documents with
2847
     *                              embedded external content.
2848
     *                              </td>
2849
     *                              </tr>
2850
     *                              <tr valign="top">
2851
     *                              <td><b>ENT_HTML401</b></td>
2852
     *                              <td>
2853
     *                              Handle code as HTML 4.01.
2854
     *                              </td>
2855
     *                              </tr>
2856
     *                              <tr valign="top">
2857
     *                              <td><b>ENT_XML1</b></td>
2858
     *                              <td>
2859
     *                              Handle code as XML 1.
2860
     *                              </td>
2861
     *                              </tr>
2862
     *                              <tr valign="top">
2863
     *                              <td><b>ENT_XHTML</b></td>
2864
     *                              <td>
2865
     *                              Handle code as XHTML.
2866
     *                              </td>
2867
     *                              </tr>
2868
     *                              <tr valign="top">
2869
     *                              <td><b>ENT_HTML5</b></td>
2870
     *                              <td>
2871
     *                              Handle code as HTML 5.
2872
     *                              </td>
2873
     *                              </tr>
2874
     *                              </table>
2875
     *                              </p>
2876
     * @param string $encoding      [optional] <p>
2877
     *                              Defines encoding used in conversion.
2878
     *                              </p>
2879
     *                              <p>
2880
     *                              For the purposes of this function, the encodings
2881
     *                              ISO-8859-1, ISO-8859-15,
2882
     *                              UTF-8, cp866,
2883
     *                              cp1251, cp1252, and
2884
     *                              KOI8-R are effectively equivalent, provided the
2885
     *                              <i>string</i> itself is valid for the encoding, as
2886
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2887
     *                              the same positions in all of these encodings.
2888
     *                              </p>
2889
     * @param bool   $double_encode [optional] <p>
2890
     *                              When <i>double_encode</i> is turned off PHP will not
2891
     *                              encode existing html entities, the default is to convert everything.
2892
     *                              </p>
2893
     *
2894
     * @return string the converted string.
2895
     *                </p>
2896
     *                <p>
2897
     *                If the input <i>string</i> contains an invalid code unit
2898
     *                sequence within the given <i>encoding</i> an empty string
2899
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2900
     *                <b>ENT_SUBSTITUTE</b> flags are set
2901
     */
2902 8
    public static function htmlspecialchars(
2903
        string $str,
2904
        int $flags = \ENT_COMPAT,
2905
        string $encoding = 'UTF-8',
2906
        bool $double_encode = true
2907
    ): string {
2908 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2909 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2910
        }
2911
2912 8
        return \htmlspecialchars(
2913 8
            $str,
2914 8
            $flags,
2915 8
            $encoding,
2916 8
            $double_encode
2917
        );
2918
    }
2919
2920
    /**
2921
     * Checks whether iconv is available on the server.
2922
     *
2923
     * @return bool
2924
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2925
     */
2926
    public static function iconv_loaded(): bool
2927
    {
2928
        return \extension_loaded('iconv');
2929
    }
2930
2931
    /**
2932
     * alias for "UTF8::decimal_to_chr()"
2933
     *
2934
     * @param mixed $int
2935
     *
2936
     * @return string
2937
     *
2938
     * @see UTF8::decimal_to_chr()
2939
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
2940
     */
2941 4
    public static function int_to_chr($int): string
2942
    {
2943 4
        return self::decimal_to_chr($int);
2944
    }
2945
2946
    /**
2947
     * Converts Integer to hexadecimal U+xxxx code point representation.
2948
     *
2949
     * INFO: opposite to UTF8::hex_to_int()
2950
     *
2951
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2952
     * @param string $prefix [optional]
2953
     *
2954
     * @return string the code point, or empty string on failure
2955
     */
2956 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2957
    {
2958 6
        $hex = \dechex($int);
2959
2960 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2961
2962 6
        return $prefix . $hex . '';
2963
    }
2964
2965
    /**
2966
     * Checks whether intl-char is available on the server.
2967
     *
2968
     * @return bool
2969
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2970
     */
2971
    public static function intlChar_loaded(): bool
2972
    {
2973
        return \class_exists('IntlChar');
2974
    }
2975
2976
    /**
2977
     * Checks whether intl is available on the server.
2978
     *
2979
     * @return bool
2980
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2981
     */
2982 5
    public static function intl_loaded(): bool
2983
    {
2984 5
        return \extension_loaded('intl');
2985
    }
2986
2987
    /**
2988
     * alias for "UTF8::is_ascii()"
2989
     *
2990
     * @param string $str
2991
     *
2992
     * @return bool
2993
     *
2994
     * @see UTF8::is_ascii()
2995
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2996
     */
2997 2
    public static function isAscii(string $str): bool
2998
    {
2999 2
        return ASCII::is_ascii($str);
3000
    }
3001
3002
    /**
3003
     * alias for "UTF8::is_base64()"
3004
     *
3005
     * @param string $str
3006
     *
3007
     * @return bool
3008
     *
3009
     * @see UTF8::is_base64()
3010
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3011
     */
3012 2
    public static function isBase64($str): bool
3013
    {
3014 2
        return self::is_base64($str);
3015
    }
3016
3017
    /**
3018
     * alias for "UTF8::is_binary()"
3019
     *
3020
     * @param mixed $str
3021
     * @param bool  $strict
3022
     *
3023
     * @return bool
3024
     *
3025
     * @see UTF8::is_binary()
3026
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3027
     */
3028 4
    public static function isBinary($str, $strict = false): bool
3029
    {
3030 4
        return self::is_binary($str, $strict);
3031
    }
3032
3033
    /**
3034
     * alias for "UTF8::is_bom()"
3035
     *
3036
     * @param string $utf8_chr
3037
     *
3038
     * @return bool
3039
     *
3040
     * @see UTF8::is_bom()
3041
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3042
     */
3043 2
    public static function isBom(string $utf8_chr): bool
3044
    {
3045 2
        return self::is_bom($utf8_chr);
3046
    }
3047
3048
    /**
3049
     * alias for "UTF8::is_html()"
3050
     *
3051
     * @param string $str
3052
     *
3053
     * @return bool
3054
     *
3055
     * @see UTF8::is_html()
3056
     * @deprecated <p>please use "UTF8::is_html()"</p>
3057
     */
3058 2
    public static function isHtml(string $str): bool
3059
    {
3060 2
        return self::is_html($str);
3061
    }
3062
3063
    /**
3064
     * alias for "UTF8::is_json()"
3065
     *
3066
     * @param string $str
3067
     *
3068
     * @return bool
3069
     *
3070
     * @see UTF8::is_json()
3071
     * @deprecated <p>please use "UTF8::is_json()"</p>
3072
     */
3073
    public static function isJson(string $str): bool
3074
    {
3075
        return self::is_json($str);
3076
    }
3077
3078
    /**
3079
     * alias for "UTF8::is_utf16()"
3080
     *
3081
     * @param mixed $str
3082
     *
3083
     * @return false|int
3084
     *                   <strong>false</strong> if is't not UTF16,<br>
3085
     *                   <strong>1</strong> for UTF-16LE,<br>
3086
     *                   <strong>2</strong> for UTF-16BE
3087
     *
3088
     * @see UTF8::is_utf16()
3089
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3090
     */
3091 2
    public static function isUtf16($str)
3092
    {
3093 2
        return self::is_utf16($str);
3094
    }
3095
3096
    /**
3097
     * alias for "UTF8::is_utf32()"
3098
     *
3099
     * @param mixed $str
3100
     *
3101
     * @return false|int
3102
     *                   <strong>false</strong> if is't not UTF16,
3103
     *                   <strong>1</strong> for UTF-32LE,
3104
     *                   <strong>2</strong> for UTF-32BE
3105
     *
3106
     * @see UTF8::is_utf32()
3107
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3108
     */
3109 2
    public static function isUtf32($str)
3110
    {
3111 2
        return self::is_utf32($str);
3112
    }
3113
3114
    /**
3115
     * alias for "UTF8::is_utf8()"
3116
     *
3117
     * @param string $str
3118
     * @param bool   $strict
3119
     *
3120
     * @return bool
3121
     *
3122
     * @see UTF8::is_utf8()
3123
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3124
     */
3125 17
    public static function isUtf8($str, $strict = false): bool
3126
    {
3127 17
        return self::is_utf8($str, $strict);
3128
    }
3129
3130
    /**
3131
     * Returns true if the string contains only alphabetic chars, false otherwise.
3132
     *
3133
     * @param string $str <p>The input string.</p>
3134
     *
3135
     * @return bool
3136
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3137
     */
3138 10
    public static function is_alpha(string $str): bool
3139
    {
3140 10
        if (self::$SUPPORT['mbstring'] === true) {
3141
            /** @noinspection PhpComposerExtensionStubsInspection */
3142 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3143
        }
3144
3145
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3146
    }
3147
3148
    /**
3149
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3150
     *
3151
     * @param string $str <p>The input string.</p>
3152
     *
3153
     * @return bool
3154
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3155
     */
3156 13
    public static function is_alphanumeric(string $str): bool
3157
    {
3158 13
        if (self::$SUPPORT['mbstring'] === true) {
3159
            /** @noinspection PhpComposerExtensionStubsInspection */
3160 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3161
        }
3162
3163
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3164
    }
3165
3166
    /**
3167
     * Checks if a string is 7 bit ASCII.
3168
     *
3169
     * @param string $str <p>The string to check.</p>
3170
     *
3171
     * @return bool
3172
     *              <p>
3173
     *              <strong>true</strong> if it is ASCII<br>
3174
     *              <strong>false</strong> otherwise
3175
     *              </p>
3176
     */
3177 8
    public static function is_ascii(string $str): bool
3178
    {
3179 8
        return ASCII::is_ascii($str);
3180
    }
3181
3182
    /**
3183
     * Returns true if the string is base64 encoded, false otherwise.
3184
     *
3185
     * @param mixed|string $str                   <p>The input string.</p>
3186
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3187
     *
3188
     * @return bool whether or not $str is base64 encoded
3189
     */
3190 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3191
    {
3192
        if (
3193 16
            $empty_string_is_valid === false
3194
            &&
3195 16
            $str === ''
3196
        ) {
3197 3
            return false;
3198
        }
3199
3200
        /**
3201
         * @psalm-suppress RedundantConditionGivenDocblockType
3202
         */
3203 15
        if (\is_string($str) === false) {
3204 2
            return false;
3205
        }
3206
3207 15
        $base64String = \base64_decode($str, true);
3208
3209 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3210
    }
3211
3212
    /**
3213
     * Check if the input is binary... (is look like a hack).
3214
     *
3215
     * @param mixed $input
3216
     * @param bool  $strict
3217
     *
3218
     * @return bool
3219
     */
3220 39
    public static function is_binary($input, bool $strict = false): bool
3221
    {
3222 39
        $input = (string) $input;
3223 39
        if ($input === '') {
3224 10
            return false;
3225
        }
3226
3227 39
        if (\preg_match('~^[01]+$~', $input)) {
3228 13
            return true;
3229
        }
3230
3231 39
        $ext = self::get_file_type($input);
3232 39
        if ($ext['type'] === 'binary') {
3233 7
            return true;
3234
        }
3235
3236 38
        $test_length = \strlen($input);
3237 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3238 38
        if (($test_null_counting / $test_length) > 0.25) {
3239 15
            return true;
3240
        }
3241
3242 34
        if ($strict === true) {
3243 34
            if (self::$SUPPORT['finfo'] === false) {
3244
                throw new \RuntimeException('ext-fileinfo: is not installed');
3245
            }
3246
3247
            /** @noinspection PhpComposerExtensionStubsInspection */
3248 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3249 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3250 15
                return true;
3251
            }
3252
        }
3253
3254 30
        return false;
3255
    }
3256
3257
    /**
3258
     * Check if the file is binary.
3259
     *
3260
     * @param string $file
3261
     *
3262
     * @return bool
3263
     */
3264 6
    public static function is_binary_file($file): bool
3265
    {
3266
        // init
3267 6
        $block = '';
3268
3269 6
        $fp = \fopen($file, 'rb');
3270 6
        if (\is_resource($fp)) {
3271 6
            $block = \fread($fp, 512);
3272 6
            \fclose($fp);
3273
        }
3274
3275 6
        if ($block === '') {
3276 2
            return false;
3277
        }
3278
3279 6
        return self::is_binary($block, true);
3280
    }
3281
3282
    /**
3283
     * Returns true if the string contains only whitespace chars, false otherwise.
3284
     *
3285
     * @param string $str <p>The input string.</p>
3286
     *
3287
     * @return bool
3288
     *              <p>Whether or not $str contains only whitespace characters.</p>
3289
     */
3290 15
    public static function is_blank(string $str): bool
3291
    {
3292 15
        if (self::$SUPPORT['mbstring'] === true) {
3293
            /** @noinspection PhpComposerExtensionStubsInspection */
3294 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3295
        }
3296
3297
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3298
    }
3299
3300
    /**
3301
     * Checks if the given string is equal to any "Byte Order Mark".
3302
     *
3303
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3304
     *
3305
     * @param string $str <p>The input string.</p>
3306
     *
3307
     * @return bool
3308
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3309
     */
3310 2
    public static function is_bom($str): bool
3311
    {
3312
        /** @noinspection PhpUnusedLocalVariableInspection */
3313 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3314 2
            if ($str === $bom_string) {
3315 2
                return true;
3316
            }
3317
        }
3318
3319 2
        return false;
3320
    }
3321
3322
    /**
3323
     * Determine whether the string is considered to be empty.
3324
     *
3325
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3326
     * empty() does not generate a warning if the variable does not exist.
3327
     *
3328
     * @param mixed $str
3329
     *
3330
     * @return bool whether or not $str is empty()
3331
     */
3332
    public static function is_empty($str): bool
3333
    {
3334
        return empty($str);
3335
    }
3336
3337
    /**
3338
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3339
     *
3340
     * @param string $str <p>The input string.</p>
3341
     *
3342
     * @return bool
3343
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3344
     */
3345 13
    public static function is_hexadecimal(string $str): bool
3346
    {
3347 13
        if (self::$SUPPORT['mbstring'] === true) {
3348
            /** @noinspection PhpComposerExtensionStubsInspection */
3349 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3350
        }
3351
3352
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3353
    }
3354
3355
    /**
3356
     * Check if the string contains any HTML tags.
3357
     *
3358
     * @param string $str <p>The input string.</p>
3359
     *
3360
     * @return bool
3361
     *              <p>Whether or not $str contains html elements.</p>
3362
     */
3363 3
    public static function is_html(string $str): bool
3364
    {
3365 3
        if ($str === '') {
3366 3
            return false;
3367
        }
3368
3369
        // init
3370 3
        $matches = [];
3371
3372 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3373
3374 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3375
3376 3
        return $matches !== [];
3377
    }
3378
3379
    /**
3380
     * Try to check if "$str" is a JSON-string.
3381
     *
3382
     * @param string $str                                    <p>The input string.</p>
3383
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3384
     *
3385
     * @return bool
3386
     *              <p>Whether or not the $str is in JSON format.</p>
3387
     */
3388 42
    public static function is_json(
3389
        string $str,
3390
        $only_array_or_object_results_are_valid = true
3391
    ): bool {
3392 42
        if ($str === '') {
3393 4
            return false;
3394
        }
3395
3396 40
        if (self::$SUPPORT['json'] === false) {
3397
            throw new \RuntimeException('ext-json: is not installed');
3398
        }
3399
3400 40
        $json = self::json_decode($str);
3401 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3402 18
            return false;
3403
        }
3404
3405
        if (
3406 24
            $only_array_or_object_results_are_valid === true
3407
            &&
3408 24
            \is_object($json) === false
3409
            &&
3410 24
            \is_array($json) === false
3411
        ) {
3412 5
            return false;
3413
        }
3414
3415
        /** @noinspection PhpComposerExtensionStubsInspection */
3416 19
        return \json_last_error() === \JSON_ERROR_NONE;
3417
    }
3418
3419
    /**
3420
     * @param string $str <p>The input string.</p>
3421
     *
3422
     * @return bool
3423
     *              <p>Whether or not $str contains only lowercase chars.</p>
3424
     */
3425 8
    public static function is_lowercase(string $str): bool
3426
    {
3427 8
        if (self::$SUPPORT['mbstring'] === true) {
3428
            /** @noinspection PhpComposerExtensionStubsInspection */
3429 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3430
        }
3431
3432
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3433
    }
3434
3435
    /**
3436
     * Returns true if the string is serialized, false otherwise.
3437
     *
3438
     * @param string $str <p>The input string.</p>
3439
     *
3440
     * @return bool
3441
     *              <p>Whether or not $str is serialized.</p>
3442
     */
3443 7
    public static function is_serialized(string $str): bool
3444
    {
3445 7
        if ($str === '') {
3446 1
            return false;
3447
        }
3448
3449
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3450
        /** @noinspection UnserializeExploitsInspection */
3451 6
        return $str === 'b:0;'
3452
               ||
3453 6
               @\unserialize($str) !== false;
3454
    }
3455
3456
    /**
3457
     * Returns true if the string contains only lower case chars, false
3458
     * otherwise.
3459
     *
3460
     * @param string $str <p>The input string.</p>
3461
     *
3462
     * @return bool
3463
     *              <p>Whether or not $str contains only lower case characters.</p>
3464
     */
3465 8
    public static function is_uppercase(string $str): bool
3466
    {
3467 8
        if (self::$SUPPORT['mbstring'] === true) {
3468
            /** @noinspection PhpComposerExtensionStubsInspection */
3469 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3470
        }
3471
3472
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3473
    }
3474
3475
    /**
3476
     * Check if the string is UTF-16.
3477
     *
3478
     * @param mixed $str                       <p>The input string.</p>
3479
     * @param bool  $check_if_string_is_binary
3480
     *
3481
     * @return false|int
3482
     *                   <strong>false</strong> if is't not UTF-16,<br>
3483
     *                   <strong>1</strong> for UTF-16LE,<br>
3484
     *                   <strong>2</strong> for UTF-16BE
3485
     */
3486 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3487
    {
3488
        // init
3489 22
        $str = (string) $str;
3490 22
        $str_chars = [];
3491
3492
        if (
3493 22
            $check_if_string_is_binary === true
3494
            &&
3495 22
            self::is_binary($str, true) === false
3496
        ) {
3497 2
            return false;
3498
        }
3499
3500 22
        if (self::$SUPPORT['mbstring'] === false) {
3501 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3502
        }
3503
3504 22
        $str = self::remove_bom($str);
3505
3506 22
        $maybe_utf16le = 0;
3507 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3508 22
        if ($test) {
3509 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3510 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3511 15
            if ($test3 === $test) {
3512
                /**
3513
                 * @psalm-suppress RedundantCondition
3514
                 */
3515 15
                if ($str_chars === []) {
3516 15
                    $str_chars = self::count_chars($str, true, false);
3517
                }
3518 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3520 15
                        ++$maybe_utf16le;
3521
                    }
3522
                }
3523 15
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 22
        $maybe_utf16be = 0;
3528 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3529 22
        if ($test) {
3530 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3531 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3532 15
            if ($test3 === $test) {
3533 15
                if ($str_chars === []) {
3534 7
                    $str_chars = self::count_chars($str, true, false);
3535
                }
3536 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3537 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3538 15
                        ++$maybe_utf16be;
3539
                    }
3540
                }
3541 15
                unset($test3charEmpty);
3542
            }
3543
        }
3544
3545 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3546 7
            if ($maybe_utf16le > $maybe_utf16be) {
3547 5
                return 1;
3548
            }
3549
3550 6
            return 2;
3551
        }
3552
3553 18
        return false;
3554
    }
3555
3556
    /**
3557
     * Check if the string is UTF-32.
3558
     *
3559
     * @param mixed $str                       <p>The input string.</p>
3560
     * @param bool  $check_if_string_is_binary
3561
     *
3562
     * @return false|int
3563
     *                   <strong>false</strong> if is't not UTF-32,<br>
3564
     *                   <strong>1</strong> for UTF-32LE,<br>
3565
     *                   <strong>2</strong> for UTF-32BE
3566
     */
3567 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3568
    {
3569
        // init
3570 20
        $str = (string) $str;
3571 20
        $str_chars = [];
3572
3573
        if (
3574 20
            $check_if_string_is_binary === true
3575
            &&
3576 20
            self::is_binary($str, true) === false
3577
        ) {
3578 2
            return false;
3579
        }
3580
3581 20
        if (self::$SUPPORT['mbstring'] === false) {
3582 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3583
        }
3584
3585 20
        $str = self::remove_bom($str);
3586
3587 20
        $maybe_utf32le = 0;
3588 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3589 20
        if ($test) {
3590 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3591 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3592 13
            if ($test3 === $test) {
3593
                /**
3594
                 * @psalm-suppress RedundantCondition
3595
                 */
3596 13
                if ($str_chars === []) {
3597 13
                    $str_chars = self::count_chars($str, true, false);
3598
                }
3599 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3600 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3601 13
                        ++$maybe_utf32le;
3602
                    }
3603
                }
3604 13
                unset($test3charEmpty);
3605
            }
3606
        }
3607
3608 20
        $maybe_utf32be = 0;
3609 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3610 20
        if ($test) {
3611 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3612 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3613 13
            if ($test3 === $test) {
3614 13
                if ($str_chars === []) {
3615 7
                    $str_chars = self::count_chars($str, true, false);
3616
                }
3617 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3618 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3619 13
                        ++$maybe_utf32be;
3620
                    }
3621
                }
3622 13
                unset($test3charEmpty);
3623
            }
3624
        }
3625
3626 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3627 3
            if ($maybe_utf32le > $maybe_utf32be) {
3628 2
                return 1;
3629
            }
3630
3631 3
            return 2;
3632
        }
3633
3634 20
        return false;
3635
    }
3636
3637
    /**
3638
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3639
     *
3640
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3641
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3642
     *
3643
     * @return bool
3644
     */
3645 82
    public static function is_utf8($str, bool $strict = false): bool
3646
    {
3647 82
        if (\is_array($str) === true) {
3648 2
            foreach ($str as &$v) {
3649 2
                if (self::is_utf8($v, $strict) === false) {
3650 2
                    return false;
3651
                }
3652
            }
3653
3654
            return true;
3655
        }
3656
3657 82
        return self::is_utf8_string((string) $str, $strict);
3658
    }
3659
3660
    /**
3661
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3662
     * Decodes a JSON string
3663
     *
3664
     * @see http://php.net/manual/en/function.json-decode.php
3665
     *
3666
     * @param string $json    <p>
3667
     *                        The <i>json</i> string being decoded.
3668
     *                        </p>
3669
     *                        <p>
3670
     *                        This function only works with UTF-8 encoded strings.
3671
     *                        </p>
3672
     *                        <p>PHP implements a superset of
3673
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3674
     *                        only supports these values when they are nested inside an array or an object.
3675
     *                        </p>
3676
     * @param bool   $assoc   [optional] <p>
3677
     *                        When <b>TRUE</b>, returned objects will be converted into
3678
     *                        associative arrays.
3679
     *                        </p>
3680
     * @param int    $depth   [optional] <p>
3681
     *                        User specified recursion depth.
3682
     *                        </p>
3683
     * @param int    $options [optional] <p>
3684
     *                        Bitmask of JSON decode options. Currently only
3685
     *                        <b>JSON_BIGINT_AS_STRING</b>
3686
     *                        is supported (default is to cast large integers as floats)
3687
     *                        </p>
3688
     *
3689
     * @return mixed
3690
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3691
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3692
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3693
     *               is deeper than the recursion limit.
3694
     */
3695 43
    public static function json_decode(
3696
        string $json,
3697
        bool $assoc = false,
3698
        int $depth = 512,
3699
        int $options = 0
3700
    ) {
3701 43
        $json = self::filter($json);
3702
3703 43
        if (self::$SUPPORT['json'] === false) {
3704
            throw new \RuntimeException('ext-json: is not installed');
3705
        }
3706
3707
        /** @noinspection PhpComposerExtensionStubsInspection */
3708 43
        return \json_decode($json, $assoc, $depth, $options);
3709
    }
3710
3711
    /**
3712
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3713
     * Returns the JSON representation of a value.
3714
     *
3715
     * @see http://php.net/manual/en/function.json-encode.php
3716
     *
3717
     * @param mixed $value   <p>
3718
     *                       The <i>value</i> being encoded. Can be any type except
3719
     *                       a resource.
3720
     *                       </p>
3721
     *                       <p>
3722
     *                       All string data must be UTF-8 encoded.
3723
     *                       </p>
3724
     *                       <p>PHP implements a superset of
3725
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3726
     *                       only supports these values when they are nested inside an array or an object.
3727
     *                       </p>
3728
     * @param int   $options [optional] <p>
3729
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3730
     *                       <b>JSON_HEX_TAG</b>,
3731
     *                       <b>JSON_HEX_AMP</b>,
3732
     *                       <b>JSON_HEX_APOS</b>,
3733
     *                       <b>JSON_NUMERIC_CHECK</b>,
3734
     *                       <b>JSON_PRETTY_PRINT</b>,
3735
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3736
     *                       <b>JSON_FORCE_OBJECT</b>,
3737
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3738
     *                       constants is described on
3739
     *                       the JSON constants page.
3740
     *                       </p>
3741
     * @param int   $depth   [optional] <p>
3742
     *                       Set the maximum depth. Must be greater than zero.
3743
     *                       </p>
3744
     *
3745
     * @return false|string
3746
     *                      A JSON encoded <strong>string</strong> on success or<br>
3747
     *                      <strong>FALSE</strong> on failure
3748
     */
3749 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3750
    {
3751 5
        $value = self::filter($value);
3752
3753 5
        if (self::$SUPPORT['json'] === false) {
3754
            throw new \RuntimeException('ext-json: is not installed');
3755
        }
3756
3757
        /** @noinspection PhpComposerExtensionStubsInspection */
3758 5
        return \json_encode($value, $options, $depth);
3759
    }
3760
3761
    /**
3762
     * Checks whether JSON is available on the server.
3763
     *
3764
     * @return bool
3765
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3766
     */
3767
    public static function json_loaded(): bool
3768
    {
3769
        return \function_exists('json_decode');
3770
    }
3771
3772
    /**
3773
     * Makes string's first char lowercase.
3774
     *
3775
     * @param string      $str                           <p>The input string</p>
3776
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3777
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3778
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3779
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3780
     *
3781
     * @return string the resulting string
3782
     */
3783 46
    public static function lcfirst(
3784
        string $str,
3785
        string $encoding = 'UTF-8',
3786
        bool $clean_utf8 = false,
3787
        string $lang = null,
3788
        bool $try_to_keep_the_string_length = false
3789
    ): string {
3790 46
        if ($clean_utf8 === true) {
3791
            $str = self::clean($str);
3792
        }
3793
3794 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3795
3796 46
        if ($encoding === 'UTF-8') {
3797 43
            $str_part_two = (string) \mb_substr($str, 1);
3798
3799 43
            if ($use_mb_functions === true) {
3800 43
                $str_part_one = \mb_strtolower(
3801 43
                    (string) \mb_substr($str, 0, 1)
3802
                );
3803
            } else {
3804
                $str_part_one = self::strtolower(
3805
                    (string) \mb_substr($str, 0, 1),
3806
                    $encoding,
3807
                    false,
3808
                    $lang,
3809 43
                    $try_to_keep_the_string_length
3810
                );
3811
            }
3812
        } else {
3813 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3814
3815 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3816
3817 3
            $str_part_one = self::strtolower(
3818 3
                (string) self::substr($str, 0, 1, $encoding),
3819 3
                $encoding,
3820 3
                false,
3821 3
                $lang,
3822 3
                $try_to_keep_the_string_length
3823
            );
3824
        }
3825
3826 46
        return $str_part_one . $str_part_two;
3827
    }
3828
3829
    /**
3830
     * alias for "UTF8::lcfirst()"
3831
     *
3832
     * @param string      $str
3833
     * @param string      $encoding
3834
     * @param bool        $clean_utf8
3835
     * @param string|null $lang
3836
     * @param bool        $try_to_keep_the_string_length
3837
     *
3838
     * @return string
3839
     *
3840
     * @see UTF8::lcfirst()
3841
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3842
     */
3843 2
    public static function lcword(
3844
        string $str,
3845
        string $encoding = 'UTF-8',
3846
        bool $clean_utf8 = false,
3847
        string $lang = null,
3848
        bool $try_to_keep_the_string_length = false
3849
    ): string {
3850 2
        return self::lcfirst(
3851 2
            $str,
3852 2
            $encoding,
3853 2
            $clean_utf8,
3854 2
            $lang,
3855 2
            $try_to_keep_the_string_length
3856
        );
3857
    }
3858
3859
    /**
3860
     * Lowercase for all words in the string.
3861
     *
3862
     * @param string      $str                           <p>The input string.</p>
3863
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3864
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3865
     *                                                   a new word.</p>
3866
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3867
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3868
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3869
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3870
     *
3871
     * @return string
3872
     */
3873 2
    public static function lcwords(
3874
        string $str,
3875
        array $exceptions = [],
3876
        string $char_list = '',
3877
        string $encoding = 'UTF-8',
3878
        bool $clean_utf8 = false,
3879
        string $lang = null,
3880
        bool $try_to_keep_the_string_length = false
3881
    ): string {
3882 2
        if (!$str) {
3883 2
            return '';
3884
        }
3885
3886 2
        $words = self::str_to_words($str, $char_list);
3887 2
        $use_exceptions = $exceptions !== [];
3888
3889 2
        $words_str = '';
3890 2
        foreach ($words as &$word) {
3891 2
            if (!$word) {
3892 2
                continue;
3893
            }
3894
3895
            if (
3896 2
                $use_exceptions === false
3897
                ||
3898 2
                !\in_array($word, $exceptions, true)
3899
            ) {
3900 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3901
            } else {
3902 2
                $words_str .= $word;
3903
            }
3904
        }
3905
3906 2
        return $words_str;
3907
    }
3908
3909
    /**
3910
     * alias for "UTF8::lcfirst()"
3911
     *
3912
     * @param string      $str
3913
     * @param string      $encoding
3914
     * @param bool        $clean_utf8
3915
     * @param string|null $lang
3916
     * @param bool        $try_to_keep_the_string_length
3917
     *
3918
     * @return string
3919
     *
3920
     * @see UTF8::lcfirst()
3921
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3922
     */
3923 5
    public static function lowerCaseFirst(
3924
        string $str,
3925
        string $encoding = 'UTF-8',
3926
        bool $clean_utf8 = false,
3927
        string $lang = null,
3928
        bool $try_to_keep_the_string_length = false
3929
    ): string {
3930 5
        return self::lcfirst(
3931 5
            $str,
3932 5
            $encoding,
3933 5
            $clean_utf8,
3934 5
            $lang,
3935 5
            $try_to_keep_the_string_length
3936
        );
3937
    }
3938
3939
    /**
3940
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
3941
     *
3942
     * @param string      $str   <p>The string to be trimmed</p>
3943
     * @param string|null $chars <p>Optional characters to be stripped</p>
3944
     *
3945
     * @return string the string with unwanted characters stripped from the left
3946
     */
3947 22
    public static function ltrim(string $str = '', string $chars = null): string
3948
    {
3949 22
        if ($str === '') {
3950 3
            return '';
3951
        }
3952
3953 21
        if (self::$SUPPORT['mbstring'] === true) {
3954 21
            if ($chars) {
3955
                /** @noinspection PregQuoteUsageInspection */
3956 10
                $chars = \preg_quote($chars);
3957 10
                $pattern = "^[${chars}]+";
3958
            } else {
3959 14
                $pattern = '^[\\s]+';
3960
            }
3961
3962
            /** @noinspection PhpComposerExtensionStubsInspection */
3963 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3964
        }
3965
3966
        if ($chars) {
3967
            $chars = \preg_quote($chars, '/');
3968
            $pattern = "^[${chars}]+";
3969
        } else {
3970
            $pattern = '^[\\s]+';
3971
        }
3972
3973
        return self::regex_replace($str, $pattern, '', '', '/');
3974
    }
3975
3976
    /**
3977
     * Returns the UTF-8 character with the maximum code point in the given data.
3978
     *
3979
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3980
     *
3981
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3982
     */
3983
    public static function max($arg)
3984
    {
3985 2
        if (\is_array($arg) === true) {
3986 2
            $arg = \implode('', $arg);
3987
        }
3988
3989 2
        $codepoints = self::codepoints($arg, false);
3990 2
        if ($codepoints === []) {
3991 2
            return null;
3992
        }
3993
3994 2
        $codepoint_max = \max($codepoints);
3995
3996 2
        return self::chr($codepoint_max);
3997
    }
3998
3999
    /**
4000
     * Calculates and returns the maximum number of bytes taken by any
4001
     * UTF-8 encoded character in the given string.
4002
     *
4003
     * @param string $str <p>The original Unicode string.</p>
4004
     *
4005
     * @return int
4006
     *             <p>Max byte lengths of the given chars.</p>
4007
     */
4008
    public static function max_chr_width(string $str): int
4009
    {
4010 2
        $bytes = self::chr_size_list($str);
4011 2
        if ($bytes !== []) {
4012 2
            return (int) \max($bytes);
4013
        }
4014
4015 2
        return 0;
4016
    }
4017
4018
    /**
4019
     * Checks whether mbstring is available on the server.
4020
     *
4021
     * @return bool
4022
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4023
     */
4024
    public static function mbstring_loaded(): bool
4025
    {
4026 26
        return \extension_loaded('mbstring');
4027
    }
4028
4029
    /**
4030
     * Returns the UTF-8 character with the minimum code point in the given data.
4031
     *
4032
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4033
     *
4034
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4035
     */
4036
    public static function min($arg)
4037
    {
4038 2
        if (\is_array($arg) === true) {
4039 2
            $arg = \implode('', $arg);
4040
        }
4041
4042 2
        $codepoints = self::codepoints($arg, false);
4043 2
        if ($codepoints === []) {
4044 2
            return null;
4045
        }
4046
4047 2
        $codepoint_min = \min($codepoints);
4048
4049 2
        return self::chr($codepoint_min);
4050
    }
4051
4052
    /**
4053
     * alias for "UTF8::normalize_encoding()"
4054
     *
4055
     * @param mixed $encoding
4056
     * @param mixed $fallback
4057
     *
4058
     * @return mixed
4059
     *
4060
     * @see UTF8::normalize_encoding()
4061
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4062
     */
4063
    public static function normalizeEncoding($encoding, $fallback = '')
4064
    {
4065 2
        return self::normalize_encoding($encoding, $fallback);
4066
    }
4067
4068
    /**
4069
     * Normalize the encoding-"name" input.
4070
     *
4071
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4072
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4073
     *
4074
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4075
     */
4076
    public static function normalize_encoding($encoding, $fallback = '')
4077
    {
4078 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4079
4080
        // init
4081 331
        $encoding = (string) $encoding;
4082
4083 331
        if (!$encoding) {
4084 285
            return $fallback;
4085
        }
4086
4087
        if (
4088 51
            $encoding === 'UTF-8'
4089
            ||
4090 51
            $encoding === 'UTF8'
4091
        ) {
4092 28
            return 'UTF-8';
4093
        }
4094
4095
        if (
4096 43
            $encoding === '8BIT'
4097
            ||
4098 43
            $encoding === 'BINARY'
4099
        ) {
4100
            return 'CP850';
4101
        }
4102
4103
        if (
4104 43
            $encoding === 'HTML'
4105
            ||
4106 43
            $encoding === 'HTML-ENTITIES'
4107
        ) {
4108 2
            return 'HTML-ENTITIES';
4109
        }
4110
4111
        if (
4112 43
            $encoding === 'ISO'
4113
            ||
4114 43
            $encoding === 'ISO-8859-1'
4115
        ) {
4116 39
            return 'ISO-8859-1';
4117
        }
4118
4119
        if (
4120 12
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4121
            ||
4122 12
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4123
        ) {
4124 1
            return $fallback;
4125
        }
4126
4127 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4128 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4129
        }
4130
4131 5
        if (self::$ENCODINGS === null) {
4132 1
            self::$ENCODINGS = self::getData('encodings');
4133
        }
4134
4135 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4136 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4137
4138 3
            return $encoding;
4139
        }
4140
4141 4
        $encoding_original = $encoding;
4142 4
        $encoding = \strtoupper($encoding);
4143 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4144
4145
        $equivalences = [
4146 4
            'ISO8859'     => 'ISO-8859-1',
4147
            'ISO88591'    => 'ISO-8859-1',
4148
            'ISO'         => 'ISO-8859-1',
4149
            'LATIN'       => 'ISO-8859-1',
4150
            'LATIN1'      => 'ISO-8859-1', // Western European
4151
            'ISO88592'    => 'ISO-8859-2',
4152
            'LATIN2'      => 'ISO-8859-2', // Central European
4153
            'ISO88593'    => 'ISO-8859-3',
4154
            'LATIN3'      => 'ISO-8859-3', // Southern European
4155
            'ISO88594'    => 'ISO-8859-4',
4156
            'LATIN4'      => 'ISO-8859-4', // Northern European
4157
            'ISO88595'    => 'ISO-8859-5',
4158
            'ISO88596'    => 'ISO-8859-6', // Greek
4159
            'ISO88597'    => 'ISO-8859-7',
4160
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4161
            'ISO88599'    => 'ISO-8859-9',
4162
            'LATIN5'      => 'ISO-8859-9', // Turkish
4163
            'ISO885911'   => 'ISO-8859-11',
4164
            'TIS620'      => 'ISO-8859-11', // Thai
4165
            'ISO885910'   => 'ISO-8859-10',
4166
            'LATIN6'      => 'ISO-8859-10', // Nordic
4167
            'ISO885913'   => 'ISO-8859-13',
4168
            'LATIN7'      => 'ISO-8859-13', // Baltic
4169
            'ISO885914'   => 'ISO-8859-14',
4170
            'LATIN8'      => 'ISO-8859-14', // Celtic
4171
            'ISO885915'   => 'ISO-8859-15',
4172
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4173
            'ISO885916'   => 'ISO-8859-16',
4174
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4175
            'CP1250'      => 'WINDOWS-1250',
4176
            'WIN1250'     => 'WINDOWS-1250',
4177
            'WINDOWS1250' => 'WINDOWS-1250',
4178
            'CP1251'      => 'WINDOWS-1251',
4179
            'WIN1251'     => 'WINDOWS-1251',
4180
            'WINDOWS1251' => 'WINDOWS-1251',
4181
            'CP1252'      => 'WINDOWS-1252',
4182
            'WIN1252'     => 'WINDOWS-1252',
4183
            'WINDOWS1252' => 'WINDOWS-1252',
4184
            'CP1253'      => 'WINDOWS-1253',
4185
            'WIN1253'     => 'WINDOWS-1253',
4186
            'WINDOWS1253' => 'WINDOWS-1253',
4187
            'CP1254'      => 'WINDOWS-1254',
4188
            'WIN1254'     => 'WINDOWS-1254',
4189
            'WINDOWS1254' => 'WINDOWS-1254',
4190
            'CP1255'      => 'WINDOWS-1255',
4191
            'WIN1255'     => 'WINDOWS-1255',
4192
            'WINDOWS1255' => 'WINDOWS-1255',
4193
            'CP1256'      => 'WINDOWS-1256',
4194
            'WIN1256'     => 'WINDOWS-1256',
4195
            'WINDOWS1256' => 'WINDOWS-1256',
4196
            'CP1257'      => 'WINDOWS-1257',
4197
            'WIN1257'     => 'WINDOWS-1257',
4198
            'WINDOWS1257' => 'WINDOWS-1257',
4199
            'CP1258'      => 'WINDOWS-1258',
4200
            'WIN1258'     => 'WINDOWS-1258',
4201
            'WINDOWS1258' => 'WINDOWS-1258',
4202
            'UTF16'       => 'UTF-16',
4203
            'UTF32'       => 'UTF-32',
4204
            'UTF8'        => 'UTF-8',
4205
            'UTF'         => 'UTF-8',
4206
            'UTF7'        => 'UTF-7',
4207
            '8BIT'        => 'CP850',
4208
            'BINARY'      => 'CP850',
4209
        ];
4210
4211 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4212 3
            $encoding = $equivalences[$encoding_upper_helper];
4213
        }
4214
4215 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4216
4217 4
        return $encoding;
4218
    }
4219
4220
    /**
4221
     * Standardize line ending to unix-like.
4222
     *
4223
     * @param string $str      <p>The input string.</p>
4224
     * @param string $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL here.</p>
4225
     *
4226
     * @return string
4227
     *                <p>A string with normalized line ending.</p>
4228
     */
4229
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4230
    {
4231 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4232
    }
4233
4234
    /**
4235
     * Normalize some MS Word special characters.
4236
     *
4237
     * @param string $str <p>The string to be normalized.</p>
4238
     *
4239
     * @return string
4240
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4241
     */
4242
    public static function normalize_msword(string $str): string
4243
    {
4244 10
        return ASCII::normalize_msword($str);
4245
    }
4246
4247
    /**
4248
     * Normalize the whitespace.
4249
     *
4250
     * @param string $str                        <p>The string to be normalized.</p>
4251
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4252
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4253
     *                                           bidirectional text chars.</p>
4254
     *
4255
     * @return string
4256
     *                <p>A string with normalized whitespace.</p>
4257
     */
4258
    public static function normalize_whitespace(
4259
        string $str,
4260
        bool $keep_non_breaking_space = false,
4261
        bool $keep_bidi_unicode_controls = false
4262
    ): string {
4263 61
        return ASCII::normalize_whitespace(
4264 61
            $str,
4265 61
            $keep_non_breaking_space,
4266 61
            $keep_bidi_unicode_controls
4267
        );
4268
    }
4269
4270
    /**
4271
     * Calculates Unicode code point of the given UTF-8 encoded character.
4272
     *
4273
     * INFO: opposite to UTF8::chr()
4274
     *
4275
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4276
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4277
     *
4278
     * @return int
4279
     *             <p>Unicode code point of the given character,<br>
4280
     *             0 on invalid UTF-8 byte sequence</p>
4281
     */
4282
    public static function ord($chr, string $encoding = 'UTF-8'): int
4283
    {
4284 26
        static $CHAR_CACHE = [];
4285
4286
        // init
4287 26
        $chr = (string) $chr;
4288
4289 26
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4290 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4291
        }
4292
4293 26
        $cache_key = $chr . $encoding;
4294 26
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4295 26
            return $CHAR_CACHE[$cache_key];
4296
        }
4297
4298
        // check again, if it's still not UTF-8
4299 10
        if ($encoding !== 'UTF-8') {
4300 3
            $chr = self::encode($encoding, $chr);
4301
        }
4302
4303 10
        if (self::$ORD === null) {
4304
            self::$ORD = self::getData('ord');
4305
        }
4306
4307 10
        if (isset(self::$ORD[$chr])) {
4308 10
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4309
        }
4310
4311
        //
4312
        // fallback via "IntlChar"
4313
        //
4314
4315 6
        if (self::$SUPPORT['intlChar'] === true) {
4316
            /** @noinspection PhpComposerExtensionStubsInspection */
4317 5
            $code = \IntlChar::ord($chr);
4318 5
            if ($code) {
4319 5
                return $CHAR_CACHE[$cache_key] = $code;
4320
            }
4321
        }
4322
4323
        //
4324
        // fallback via vanilla php
4325
        //
4326
4327
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4328 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4329
        /** @noinspection OffsetOperationsInspection */
4330 1
        $code = $chr ? $chr[1] : 0;
4331
4332
        /** @noinspection OffsetOperationsInspection */
4333 1
        if ($code >= 0xF0 && isset($chr[4])) {
4334
            /** @noinspection UnnecessaryCastingInspection */
4335
            /** @noinspection OffsetOperationsInspection */
4336
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4337
        }
4338
4339
        /** @noinspection OffsetOperationsInspection */
4340 1
        if ($code >= 0xE0 && isset($chr[3])) {
4341
            /** @noinspection UnnecessaryCastingInspection */
4342
            /** @noinspection OffsetOperationsInspection */
4343 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4344
        }
4345
4346
        /** @noinspection OffsetOperationsInspection */
4347 1
        if ($code >= 0xC0 && isset($chr[2])) {
4348
            /** @noinspection UnnecessaryCastingInspection */
4349
            /** @noinspection OffsetOperationsInspection */
4350 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4351
        }
4352
4353
        return $CHAR_CACHE[$cache_key] = $code;
4354
    }
4355
4356
    /**
4357
     * Parses the string into an array (into the the second parameter).
4358
     *
4359
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4360
     *          if the second parameter is not set!
4361
     *
4362
     * @see http://php.net/manual/en/function.parse-str.php
4363
     *
4364
     * @param string $str        <p>The input string.</p>
4365
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4366
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4367
     *
4368
     * @return bool
4369
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4370
     */
4371
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4372
    {
4373 2
        if ($clean_utf8 === true) {
4374 2
            $str = self::clean($str);
4375
        }
4376
4377 2
        if (self::$SUPPORT['mbstring'] === true) {
4378 2
            $return = \mb_parse_str($str, $result);
4379
4380 2
            return $return !== false && $result !== [];
4381
        }
4382
4383
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4384
        \parse_str($str, $result);
4385
4386
        return $result !== [];
4387
    }
4388
4389
    /**
4390
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4391
     *
4392
     * @return bool
4393
     *              <p>
4394
     *              <strong>true</strong> if support is available,<br>
4395
     *              <strong>false</strong> otherwise
4396
     *              </p>
4397
     */
4398
    public static function pcre_utf8_support(): bool
4399
    {
4400
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4401 102
        return (bool) @\preg_match('//u', '');
4402
    }
4403
4404
    /**
4405
     * Create an array containing a range of UTF-8 characters.
4406
     *
4407
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4408
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4409
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4410
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4411
     * @param float|int $step      [optional] <p>
4412
     *                             If a step value is given, it will be used as the
4413
     *                             increment between elements in the sequence. step
4414
     *                             should be given as a positive number. If not specified,
4415
     *                             step will default to 1.
4416
     *                             </p>
4417
     *
4418
     * @return string[]
4419
     */
4420
    public static function range(
4421
        $var1,
4422
        $var2,
4423
        bool $use_ctype = true,
4424
        string $encoding = 'UTF-8',
4425
        $step = 1
4426
    ): array {
4427 2
        if (!$var1 || !$var2) {
4428 2
            return [];
4429
        }
4430
4431 2
        if ($step !== 1) {
4432
            /**
4433
             * @psalm-suppress RedundantConditionGivenDocblockType
4434
             */
4435 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4436
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4437
            }
4438
4439
            /**
4440
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4441
             */
4442 1
            if ($step <= 0) {
4443
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4444
            }
4445
        }
4446
4447 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4448
            throw new \RuntimeException('ext-ctype: is not installed');
4449
        }
4450
4451 2
        $is_digit = false;
4452 2
        $is_xdigit = false;
4453
4454
        /** @noinspection PhpComposerExtensionStubsInspection */
4455 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4456 2
            $is_digit = true;
4457 2
            $start = (int) $var1;
4458 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4459
            $is_xdigit = true;
4460
            $start = (int) self::hex_to_int($var1);
4461 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4462 1
            $start = (int) $var1;
4463
        } else {
4464 2
            $start = self::ord($var1);
4465
        }
4466
4467 2
        if (!$start) {
4468
            return [];
4469
        }
4470
4471 2
        if ($is_digit) {
4472 2
            $end = (int) $var2;
4473 2
        } elseif ($is_xdigit) {
4474
            $end = (int) self::hex_to_int($var2);
4475 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4476 1
            $end = (int) $var2;
4477
        } else {
4478 2
            $end = self::ord($var2);
4479
        }
4480
4481 2
        if (!$end) {
4482
            return [];
4483
        }
4484
4485 2
        $array = [];
4486 2
        foreach (\range($start, $end, $step) as $i) {
4487 2
            $array[] = (string) self::chr((int) $i, $encoding);
4488
        }
4489
4490 2
        return $array;
4491
    }
4492
4493
    /**
4494
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4495
     *
4496
     * e.g:
4497
     * 'test+test'                     => 'test+test'
4498
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4499
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4500
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4501
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4502
     * 'Düsseldorf'                   => 'Düsseldorf'
4503
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4504
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4505
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4506
     *
4507
     * @param string $str          <p>The input string.</p>
4508
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4509
     *
4510
     * @return string
4511
     *                <p>The decoded URL, as a string.</p>
4512
     */
4513
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4514
    {
4515 7
        if ($str === '') {
4516 4
            return '';
4517
        }
4518
4519
        if (
4520 7
            \strpos($str, '&') === false
4521
            &&
4522 7
            \strpos($str, '%') === false
4523
            &&
4524 7
            \strpos($str, '+') === false
4525
            &&
4526 7
            \strpos($str, '\u') === false
4527
        ) {
4528 4
            return self::fix_simple_utf8($str);
4529
        }
4530
4531 7
        $str = self::urldecode_unicode_helper($str);
4532
4533 7
        if ($multi_decode) {
4534
            do {
4535 6
                $str_compare = $str;
4536
4537
                /**
4538
                 * @psalm-suppress PossiblyInvalidArgument
4539
                 */
4540 6
                $str = self::fix_simple_utf8(
4541 6
                    \rawurldecode(
4542 6
                        self::html_entity_decode(
4543 6
                            self::to_utf8($str),
4544 6
                            \ENT_QUOTES | \ENT_HTML5
4545
                        )
4546
                    )
4547
                );
4548 6
            } while ($str_compare !== $str);
4549
        } else {
4550
            /**
4551
             * @psalm-suppress PossiblyInvalidArgument
4552
             */
4553 1
            $str = self::fix_simple_utf8(
4554 1
                \rawurldecode(
4555 1
                    self::html_entity_decode(
4556 1
                        self::to_utf8($str),
4557 1
                        \ENT_QUOTES | \ENT_HTML5
4558
                    )
4559
                )
4560
            );
4561
        }
4562
4563 7
        return $str;
4564
    }
4565
4566
    /**
4567
     * Replaces all occurrences of $pattern in $str by $replacement.
4568
     *
4569
     * @param string $str         <p>The input string.</p>
4570
     * @param string $pattern     <p>The regular expression pattern.</p>
4571
     * @param string $replacement <p>The string to replace with.</p>
4572
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4573
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4574
     *
4575
     * @return string
4576
     */
4577
    public static function regex_replace(
4578
        string $str,
4579
        string $pattern,
4580
        string $replacement,
4581
        string $options = '',
4582
        string $delimiter = '/'
4583
    ): string {
4584 18
        if ($options === 'msr') {
4585 9
            $options = 'ms';
4586
        }
4587
4588
        // fallback
4589 18
        if (!$delimiter) {
4590
            $delimiter = '/';
4591
        }
4592
4593 18
        return (string) \preg_replace(
4594 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4595 18
            $replacement,
4596 18
            $str
4597
        );
4598
    }
4599
4600
    /**
4601
     * alias for "UTF8::remove_bom()"
4602
     *
4603
     * @param string $str
4604
     *
4605
     * @return string
4606
     *
4607
     * @see UTF8::remove_bom()
4608
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4609
     */
4610
    public static function removeBOM(string $str): string
4611
    {
4612
        return self::remove_bom($str);
4613
    }
4614
4615
    /**
4616
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4617
     *
4618
     * @param string $str <p>The input string.</p>
4619
     *
4620
     * @return string
4621
     *                <p>A string without UTF-BOM.</p>
4622
     */
4623
    public static function remove_bom(string $str): string
4624
    {
4625 55
        if ($str === '') {
4626 9
            return '';
4627
        }
4628
4629 55
        $str_length = \strlen($str);
4630 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4631 55
            if (\strpos($str, $bom_string, 0) === 0) {
4632
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
4633 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4634 11
                if ($str_tmp === false) {
4635
                    return '';
4636
                }
4637
4638 11
                $str_length -= (int) $bom_byte_length;
4639
4640 55
                $str = (string) $str_tmp;
4641
            }
4642
        }
4643
4644 55
        return $str;
4645
    }
4646
4647
    /**
4648
     * Removes duplicate occurrences of a string in another string.
4649
     *
4650
     * @param string          $str  <p>The base string.</p>
4651
     * @param string|string[] $what <p>String to search for in the base string.</p>
4652
     *
4653
     * @return string
4654
     *                <p>A string with removed duplicates.</p>
4655
     */
4656
    public static function remove_duplicates(string $str, $what = ' '): string
4657
    {
4658 2
        if (\is_string($what) === true) {
4659 2
            $what = [$what];
4660
        }
4661
4662
        /**
4663
         * @psalm-suppress RedundantConditionGivenDocblockType
4664
         */
4665 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4666 2
            foreach ($what as $item) {
4667 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4668
            }
4669
        }
4670
4671 2
        return $str;
4672
    }
4673
4674
    /**
4675
     * Remove html via "strip_tags()" from the string.
4676
     *
4677
     * @param string $str            <p>The input string.</p>
4678
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4679
     *                               not be stripped. Default: null
4680
     *                               </p>
4681
     *
4682
     * @return string
4683
     *                <p>A string with without html tags.</p>
4684
     */
4685
    public static function remove_html(string $str, string $allowable_tags = ''): string
4686
    {
4687 6
        return \strip_tags($str, $allowable_tags);
4688
    }
4689
4690
    /**
4691
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4692
     *
4693
     * @param string $str         <p>The input string.</p>
4694
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4695
     *
4696
     * @return string
4697
     *                <p>A string without breaks.</p>
4698
     */
4699
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4700
    {
4701 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4702
    }
4703
4704
    /**
4705
     * Remove invisible characters from a string.
4706
     *
4707
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4708
     *
4709
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4710
     *
4711
     * @param string $str         <p>The input string.</p>
4712
     * @param bool   $url_encoded [optional] <p>
4713
     *                            Try to remove url encoded control character.
4714
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
4715
     *                            <br>
4716
     *                            Default: false
4717
     *                            </p>
4718
     * @param string $replacement [optional] <p>The replacement character.</p>
4719
     *
4720
     * @return string
4721
     *                <p>A string without invisible chars.</p>
4722
     */
4723
    public static function remove_invisible_characters(
4724
        string $str,
4725
        bool $url_encoded = false,
4726
        string $replacement = ''
4727
    ): string {
4728 89
        return ASCII::remove_invisible_characters(
4729 89
            $str,
4730 89
            $url_encoded,
4731 89
            $replacement
4732
        );
4733
    }
4734
4735
    /**
4736
     * Returns a new string with the prefix $substring removed, if present.
4737
     *
4738
     * @param string $str       <p>The input string.</p>
4739
     * @param string $substring <p>The prefix to remove.</p>
4740
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4741
     *
4742
     * @return string
4743
     *                <p>A string without the prefix $substring.</p>
4744
     */
4745
    public static function remove_left(
4746
        string $str,
4747
        string $substring,
4748
        string $encoding = 'UTF-8'
4749
    ): string {
4750 12
        if ($substring && \strpos($str, $substring) === 0) {
4751 6
            if ($encoding === 'UTF-8') {
4752 4
                return (string) \mb_substr(
4753 4
                    $str,
4754 4
                    (int) \mb_strlen($substring)
4755
                );
4756
            }
4757
4758 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4759
4760 2
            return (string) self::substr(
4761 2
                $str,
4762 2
                (int) self::strlen($substring, $encoding),
4763 2
                null,
4764 2
                $encoding
4765
            );
4766
        }
4767
4768 6
        return $str;
4769
    }
4770
4771
    /**
4772
     * Returns a new string with the suffix $substring removed, if present.
4773
     *
4774
     * @param string $str
4775
     * @param string $substring <p>The suffix to remove.</p>
4776
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4777
     *
4778
     * @return string
4779
     *                <p>A string having a $str without the suffix $substring.</p>
4780
     */
4781
    public static function remove_right(
4782
        string $str,
4783
        string $substring,
4784
        string $encoding = 'UTF-8'
4785
    ): string {
4786 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4787 6
            if ($encoding === 'UTF-8') {
4788 4
                return (string) \mb_substr(
4789 4
                    $str,
4790 4
                    0,
4791 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4792
                );
4793
            }
4794
4795 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4796
4797 2
            return (string) self::substr(
4798 2
                $str,
4799 2
                0,
4800 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4801 2
                $encoding
4802
            );
4803
        }
4804
4805 6
        return $str;
4806
    }
4807
4808
    /**
4809
     * Replaces all occurrences of $search in $str by $replacement.
4810
     *
4811
     * @param string $str            <p>The input string.</p>
4812
     * @param string $search         <p>The needle to search for.</p>
4813
     * @param string $replacement    <p>The string to replace with.</p>
4814
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4815
     *
4816
     * @return string
4817
     *                <p>A string with replaced parts.</p>
4818
     */
4819
    public static function replace(
4820
        string $str,
4821
        string $search,
4822
        string $replacement,
4823
        bool $case_sensitive = true
4824
    ): string {
4825 29
        if ($case_sensitive) {
4826 22
            return \str_replace($search, $replacement, $str);
4827
        }
4828
4829 7
        return self::str_ireplace($search, $replacement, $str);
4830
    }
4831
4832
    /**
4833
     * Replaces all occurrences of $search in $str by $replacement.
4834
     *
4835
     * @param string       $str            <p>The input string.</p>
4836
     * @param array        $search         <p>The elements to search for.</p>
4837
     * @param array|string $replacement    <p>The string to replace with.</p>
4838
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4839
     *
4840
     * @return string
4841
     *                <p>A string with replaced parts.</p>
4842
     */
4843
    public static function replace_all(
4844
        string $str,
4845
        array $search,
4846
        $replacement,
4847
        bool $case_sensitive = true
4848
    ): string {
4849 30
        if ($case_sensitive) {
4850 23
            return \str_replace($search, $replacement, $str);
4851
        }
4852
4853 7
        return self::str_ireplace($search, $replacement, $str);
4854
    }
4855
4856
    /**
4857
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4858
     *
4859
     * @param string $str                        <p>The input string</p>
4860
     * @param string $replacement_char           <p>The replacement character.</p>
4861
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4862
     *
4863
     * @return string
4864
     *                <p>A string without diamond question marks (�).</p>
4865
     */
4866
    public static function replace_diamond_question_mark(
4867
        string $str,
4868
        string $replacement_char = '',
4869
        bool $process_invalid_utf8_chars = true
4870
    ): string {
4871 35
        if ($str === '') {
4872 9
            return '';
4873
        }
4874
4875 35
        if ($process_invalid_utf8_chars === true) {
4876 35
            $replacement_char_helper = $replacement_char;
4877 35
            if ($replacement_char === '') {
4878 35
                $replacement_char_helper = 'none';
4879
            }
4880
4881 35
            if (self::$SUPPORT['mbstring'] === false) {
4882
                // if there is no native support for "mbstring",
4883
                // then we need to clean the string before ...
4884
                $str = self::clean($str);
4885
            }
4886
4887 35
            $save = \mb_substitute_character();
4888 35
            \mb_substitute_character($replacement_char_helper);
4889
            // the polyfill maybe return false, so cast to string
4890 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4891 35
            \mb_substitute_character($save);
4892
        }
4893
4894 35
        return \str_replace(
4895
            [
4896 35
                "\xEF\xBF\xBD",
4897
                '�',
4898
            ],
4899
            [
4900 35
                $replacement_char,
4901 35
                $replacement_char,
4902
            ],
4903 35
            $str
4904
        );
4905
    }
4906
4907
    /**
4908
     * Strip whitespace or other characters from the end of a UTF-8 string.
4909
     *
4910
     * @param string      $str   <p>The string to be trimmed.</p>
4911
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4912
     *
4913
     * @return string
4914
     *                <p>A string with unwanted characters stripped from the right.</p>
4915
     */
4916
    public static function rtrim(string $str = '', string $chars = null): string
4917
    {
4918 20
        if ($str === '') {
4919 3
            return '';
4920
        }
4921
4922 19
        if (self::$SUPPORT['mbstring'] === true) {
4923 19
            if ($chars) {
4924
                /** @noinspection PregQuoteUsageInspection */
4925 8
                $chars = \preg_quote($chars);
4926 8
                $pattern = "[${chars}]+$";
4927
            } else {
4928 14
                $pattern = '[\\s]+$';
4929
            }
4930
4931
            /** @noinspection PhpComposerExtensionStubsInspection */
4932 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4933
        }
4934
4935
        if ($chars) {
4936
            $chars = \preg_quote($chars, '/');
4937
            $pattern = "[${chars}]+$";
4938
        } else {
4939
            $pattern = '[\\s]+$';
4940
        }
4941
4942
        return self::regex_replace($str, $pattern, '', '', '/');
4943
    }
4944
4945
    /**
4946
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4947
     *
4948
     * @psalm-suppress MissingReturnType
4949
     */
4950
    public static function showSupport()
4951
    {
4952 2
        echo '<pre>';
4953 2
        foreach (self::$SUPPORT as $key => &$value) {
4954 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4955
        }
4956 2
        unset($value);
4957 2
        echo '</pre>';
4958 2
    }
4959
4960
    /**
4961
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4962
     *
4963
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4964
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4965
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4966
     *
4967
     * @return string
4968
     *                <p>The HTML numbered entity for the given character.</p>
4969
     */
4970
    public static function single_chr_html_encode(
4971
        string $char,
4972
        bool $keep_ascii_chars = false,
4973
        string $encoding = 'UTF-8'
4974
    ): string {
4975 2
        if ($char === '') {
4976 2
            return '';
4977
        }
4978
4979
        if (
4980 2
            $keep_ascii_chars === true
4981
            &&
4982 2
            ASCII::is_ascii($char) === true
4983
        ) {
4984 2
            return $char;
4985
        }
4986
4987 2
        return '&#' . self::ord($char, $encoding) . ';';
4988
    }
4989
4990
    /**
4991
     * @param string $str
4992
     * @param int    $tab_length
4993
     *
4994
     * @return string
4995
     */
4996
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
4997
    {
4998 5
        if ($tab_length === 4) {
4999 3
            $tab = '    ';
5000 2
        } elseif ($tab_length === 2) {
5001 1
            $tab = '  ';
5002
        } else {
5003 1
            $tab = \str_repeat(' ', $tab_length);
5004
        }
5005
5006 5
        return \str_replace($tab, "\t", $str);
5007
    }
5008
5009
    /**
5010
     * alias for "UTF8::str_split()"
5011
     *
5012
     * @param string|string[] $str
5013
     * @param int             $length
5014
     * @param bool            $clean_utf8
5015
     *
5016
     * @return string[]
5017
     *
5018
     * @see UTF8::str_split()
5019
     * @deprecated <p>please use "UTF8::str_split()"</p>
5020
     */
5021
    public static function split(
5022
        $str,
5023
        int $length = 1,
5024
        bool $clean_utf8 = false
5025
    ): array {
5026 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
5027
    }
5028
5029
    /**
5030
     * alias for "UTF8::str_starts_with()"
5031
     *
5032
     * @param string $haystack
5033
     * @param string $needle
5034
     *
5035
     * @return bool
5036
     *
5037
     * @see UTF8::str_starts_with()
5038
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5039
     */
5040
    public static function str_begins(string $haystack, string $needle): bool
5041
    {
5042
        return self::str_starts_with($haystack, $needle);
5043
    }
5044
5045
    /**
5046
     * Returns a camelCase version of the string. Trims surrounding spaces,
5047
     * capitalizes letters following digits, spaces, dashes and underscores,
5048
     * and removes spaces, dashes, as well as underscores.
5049
     *
5050
     * @param string      $str                           <p>The input string.</p>
5051
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5052
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5053
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5054
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5055
     *
5056
     * @return string
5057
     */
5058
    public static function str_camelize(
5059
        string $str,
5060
        string $encoding = 'UTF-8',
5061
        bool $clean_utf8 = false,
5062
        string $lang = null,
5063
        bool $try_to_keep_the_string_length = false
5064
    ): string {
5065 32
        if ($clean_utf8 === true) {
5066
            $str = self::clean($str);
5067
        }
5068
5069 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5070 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5071
        }
5072
5073 32
        $str = self::lcfirst(
5074 32
            \trim($str),
5075 32
            $encoding,
5076 32
            false,
5077 32
            $lang,
5078 32
            $try_to_keep_the_string_length
5079
        );
5080 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5081
5082 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5083
5084 32
        $str = (string) \preg_replace_callback(
5085 32
            '/[-_\\s]+(.)?/u',
5086
            /**
5087
             * @param array $match
5088
             *
5089
             * @return string
5090
             */
5091
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5092 27
                if (isset($match[1])) {
5093 27
                    if ($use_mb_functions === true) {
5094 27
                        if ($encoding === 'UTF-8') {
5095 27
                            return \mb_strtoupper($match[1]);
5096
                        }
5097
5098
                        return \mb_strtoupper($match[1], $encoding);
5099
                    }
5100
5101
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5102
                }
5103
5104 1
                return '';
5105 32
            },
5106 32
            $str
5107
        );
5108
5109 32
        return (string) \preg_replace_callback(
5110 32
            '/[\\p{N}]+(.)?/u',
5111
            /**
5112
             * @param array $match
5113
             *
5114
             * @return string
5115
             */
5116
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5117 6
                if ($use_mb_functions === true) {
5118 6
                    if ($encoding === 'UTF-8') {
5119 6
                        return \mb_strtoupper($match[0]);
5120
                    }
5121
5122
                    return \mb_strtoupper($match[0], $encoding);
5123
                }
5124
5125
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5126 32
            },
5127 32
            $str
5128
        );
5129
    }
5130
5131
    /**
5132
     * Returns the string with the first letter of each word capitalized,
5133
     * except for when the word is a name which shouldn't be capitalized.
5134
     *
5135
     * @param string $str
5136
     *
5137
     * @return string
5138
     *                <p>A string with $str capitalized.</p>
5139
     */
5140
    public static function str_capitalize_name(string $str): string
5141
    {
5142 1
        return self::str_capitalize_name_helper(
5143 1
            self::str_capitalize_name_helper(
5144 1
                self::collapse_whitespace($str),
5145 1
                ' '
5146
            ),
5147 1
            '-'
5148
        );
5149
    }
5150
5151
    /**
5152
     * Returns true if the string contains $needle, false otherwise. By default
5153
     * the comparison is case-sensitive, but can be made insensitive by setting
5154
     * $case_sensitive to false.
5155
     *
5156
     * @param string $haystack       <p>The input string.</p>
5157
     * @param string $needle         <p>Substring to look for.</p>
5158
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5159
     *
5160
     * @return bool whether or not $haystack contains $needle
5161
     */
5162
    public static function str_contains(
5163
        string $haystack,
5164
        string $needle,
5165
        bool $case_sensitive = true
5166
    ): bool {
5167 21
        if ($case_sensitive) {
5168 11
            return \strpos($haystack, $needle) !== false;
5169
        }
5170
5171 10
        return \mb_stripos($haystack, $needle) !== false;
5172
    }
5173
5174
    /**
5175
     * Returns true if the string contains all $needles, false otherwise. By
5176
     * default the comparison is case-sensitive, but can be made insensitive by
5177
     * setting $case_sensitive to false.
5178
     *
5179
     * @param string $haystack       <p>The input string.</p>
5180
     * @param array  $needles        <p>SubStrings to look for.</p>
5181
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5182
     *
5183
     * @return bool whether or not $haystack contains $needle
5184
     */
5185
    public static function str_contains_all(
5186
        string $haystack,
5187
        array $needles,
5188
        bool $case_sensitive = true
5189
    ): bool {
5190 44
        if ($haystack === '' || $needles === []) {
5191 1
            return false;
5192
        }
5193
5194
        /** @noinspection LoopWhichDoesNotLoopInspection */
5195 43
        foreach ($needles as &$needle) {
5196 43
            if (!$needle) {
5197 1
                return false;
5198
            }
5199
5200 42
            if ($case_sensitive) {
5201 22
                return \strpos($haystack, $needle) !== false;
5202
            }
5203
5204 20
            return \mb_stripos($haystack, $needle) !== false;
5205
        }
5206
5207
        return true;
5208
    }
5209
5210
    /**
5211
     * Returns true if the string contains any $needles, false otherwise. By
5212
     * default the comparison is case-sensitive, but can be made insensitive by
5213
     * setting $case_sensitive to false.
5214
     *
5215
     * @param string $haystack       <p>The input string.</p>
5216
     * @param array  $needles        <p>SubStrings to look for.</p>
5217
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5218
     *
5219
     * @return bool
5220
     *              Whether or not $str contains $needle
5221
     */
5222
    public static function str_contains_any(
5223
        string $haystack,
5224
        array $needles,
5225
        bool $case_sensitive = true
5226
    ): bool {
5227 46
        if ($haystack === '' || $needles === []) {
5228 1
            return false;
5229
        }
5230
5231
        /** @noinspection LoopWhichDoesNotLoopInspection */
5232 45
        foreach ($needles as &$needle) {
5233 45
            if (!$needle) {
5234
                continue;
5235
            }
5236
5237 45
            if ($case_sensitive) {
5238 25
                if (\strpos($haystack, $needle) !== false) {
5239 14
                    return true;
5240
                }
5241
5242 13
                continue;
5243
            }
5244
5245 20
            if (\mb_stripos($haystack, $needle) !== false) {
5246 20
                return true;
5247
            }
5248
        }
5249
5250 19
        return false;
5251
    }
5252
5253
    /**
5254
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5255
     * inserted before uppercase characters (with the exception of the first
5256
     * character of the string), and in place of spaces as well as underscores.
5257
     *
5258
     * @param string $str      <p>The input string.</p>
5259
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5260
     *
5261
     * @return string
5262
     */
5263
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5264
    {
5265 19
        return self::str_delimit($str, '-', $encoding);
5266
    }
5267
5268
    /**
5269
     * Returns a lowercase and trimmed string separated by the given delimiter.
5270
     * Delimiters are inserted before uppercase characters (with the exception
5271
     * of the first character of the string), and in place of spaces, dashes,
5272
     * and underscores. Alpha delimiters are not converted to lowercase.
5273
     *
5274
     * @param string      $str                           <p>The input string.</p>
5275
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5276
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5277
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5278
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5279
     *                                                   tr</p>
5280
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5281
     *                                                   ß</p>
5282
     *
5283
     * @return string
5284
     */
5285
    public static function str_delimit(
5286
        string $str,
5287
        string $delimiter,
5288
        string $encoding = 'UTF-8',
5289
        bool $clean_utf8 = false,
5290
        string $lang = null,
5291
        bool $try_to_keep_the_string_length = false
5292
    ): string {
5293 49
        if (self::$SUPPORT['mbstring'] === true) {
5294
            /** @noinspection PhpComposerExtensionStubsInspection */
5295 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5296
5297 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5298 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5299 22
                $str = \mb_strtolower($str);
5300
            } else {
5301 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5302
            }
5303
5304
            /** @noinspection PhpComposerExtensionStubsInspection */
5305 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5306
        }
5307
5308
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5309
5310
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5311
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5312
            $str = \mb_strtolower($str);
5313
        } else {
5314
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5315
        }
5316
5317
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5318
    }
5319
5320
    /**
5321
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5322
     *
5323
     * @param string $str <p>The input string.</p>
5324
     *
5325
     * @return false|string
5326
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5327
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5328
     */
5329
    public static function str_detect_encoding($str)
5330
    {
5331
        // init
5332 30
        $str = (string) $str;
5333
5334
        //
5335
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5336
        //
5337
5338 30
        if (self::is_binary($str, true) === true) {
5339 11
            $is_utf32 = self::is_utf32($str, false);
5340 11
            if ($is_utf32 === 1) {
5341
                return 'UTF-32LE';
5342
            }
5343 11
            if ($is_utf32 === 2) {
5344 1
                return 'UTF-32BE';
5345
            }
5346
5347 11
            $is_utf16 = self::is_utf16($str, false);
5348 11
            if ($is_utf16 === 1) {
5349 3
                return 'UTF-16LE';
5350
            }
5351 11
            if ($is_utf16 === 2) {
5352 2
                return 'UTF-16BE';
5353
            }
5354
5355
            // is binary but not "UTF-16" or "UTF-32"
5356 9
            return false;
5357
        }
5358
5359
        //
5360
        // 2.) simple check for ASCII chars
5361
        //
5362
5363 26
        if (ASCII::is_ascii($str) === true) {
5364 10
            return 'ASCII';
5365
        }
5366
5367
        //
5368
        // 3.) simple check for UTF-8 chars
5369
        //
5370
5371 26
        if (self::is_utf8_string($str) === true) {
5372 19
            return 'UTF-8';
5373
        }
5374
5375
        //
5376
        // 4.) check via "mb_detect_encoding()"
5377
        //
5378
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5379
5380
        $encoding_detecting_order = [
5381 15
            'ISO-8859-1',
5382
            'ISO-8859-2',
5383
            'ISO-8859-3',
5384
            'ISO-8859-4',
5385
            'ISO-8859-5',
5386
            'ISO-8859-6',
5387
            'ISO-8859-7',
5388
            'ISO-8859-8',
5389
            'ISO-8859-9',
5390
            'ISO-8859-10',
5391
            'ISO-8859-13',
5392
            'ISO-8859-14',
5393
            'ISO-8859-15',
5394
            'ISO-8859-16',
5395
            'WINDOWS-1251',
5396
            'WINDOWS-1252',
5397
            'WINDOWS-1254',
5398
            'CP932',
5399
            'CP936',
5400
            'CP950',
5401
            'CP866',
5402
            'CP850',
5403
            'CP51932',
5404
            'CP50220',
5405
            'CP50221',
5406
            'CP50222',
5407
            'ISO-2022-JP',
5408
            'ISO-2022-KR',
5409
            'JIS',
5410
            'JIS-ms',
5411
            'EUC-CN',
5412
            'EUC-JP',
5413
        ];
5414
5415 15
        if (self::$SUPPORT['mbstring'] === true) {
5416
            // info: do not use the symfony polyfill here
5417 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5418 15
            if ($encoding) {
5419 15
                return $encoding;
5420
            }
5421
        }
5422
5423
        //
5424
        // 5.) check via "iconv()"
5425
        //
5426
5427
        if (self::$ENCODINGS === null) {
5428
            self::$ENCODINGS = self::getData('encodings');
5429
        }
5430
5431
        foreach (self::$ENCODINGS as $encoding_tmp) {
5432
            // INFO: //IGNORE but still throw notice
5433
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5434
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5435
                return $encoding_tmp;
5436
            }
5437
        }
5438
5439
        return false;
5440
    }
5441
5442
    /**
5443
     * alias for "UTF8::str_ends_with()"
5444
     *
5445
     * @param string $haystack
5446
     * @param string $needle
5447
     *
5448
     * @return bool
5449
     *
5450
     * @see UTF8::str_ends_with()
5451
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
5452
     */
5453
    public static function str_ends(string $haystack, string $needle): bool
5454
    {
5455
        return self::str_ends_with($haystack, $needle);
5456
    }
5457
5458
    /**
5459
     * Check if the string ends with the given substring.
5460
     *
5461
     * @param string $haystack <p>The string to search in.</p>
5462
     * @param string $needle   <p>The substring to search for.</p>
5463
     *
5464
     * @return bool
5465
     */
5466
    public static function str_ends_with(string $haystack, string $needle): bool
5467
    {
5468 9
        if ($needle === '') {
5469 2
            return true;
5470
        }
5471
5472 9
        if ($haystack === '') {
5473
            return false;
5474
        }
5475
5476 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5477
    }
5478
5479
    /**
5480
     * Returns true if the string ends with any of $substrings, false otherwise.
5481
     *
5482
     * - case-sensitive
5483
     *
5484
     * @param string   $str        <p>The input string.</p>
5485
     * @param string[] $substrings <p>Substrings to look for.</p>
5486
     *
5487
     * @return bool whether or not $str ends with $substring
5488
     */
5489
    public static function str_ends_with_any(string $str, array $substrings): bool
5490
    {
5491 7
        if ($substrings === []) {
5492
            return false;
5493
        }
5494
5495 7
        foreach ($substrings as &$substring) {
5496 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5497 7
                return true;
5498
            }
5499
        }
5500
5501 6
        return false;
5502
    }
5503
5504
    /**
5505
     * Ensures that the string begins with $substring. If it doesn't, it's
5506
     * prepended.
5507
     *
5508
     * @param string $str       <p>The input string.</p>
5509
     * @param string $substring <p>The substring to add if not present.</p>
5510
     *
5511
     * @return string
5512
     */
5513
    public static function str_ensure_left(string $str, string $substring): string
5514
    {
5515
        if (
5516 10
            $substring !== ''
5517
            &&
5518 10
            \strpos($str, $substring) === 0
5519
        ) {
5520 6
            return $str;
5521
        }
5522
5523 4
        return $substring . $str;
5524
    }
5525
5526
    /**
5527
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5528
     *
5529
     * @param string $str       <p>The input string.</p>
5530
     * @param string $substring <p>The substring to add if not present.</p>
5531
     *
5532
     * @return string
5533
     */
5534
    public static function str_ensure_right(string $str, string $substring): string
5535
    {
5536
        if (
5537 10
            $str === ''
5538
            ||
5539 10
            $substring === ''
5540
            ||
5541 10
            \substr($str, -\strlen($substring)) !== $substring
5542
        ) {
5543 4
            $str .= $substring;
5544
        }
5545
5546 10
        return $str;
5547
    }
5548
5549
    /**
5550
     * Capitalizes the first word of the string, replaces underscores with
5551
     * spaces, and strips '_id'.
5552
     *
5553
     * @param string $str
5554
     *
5555
     * @return string
5556
     */
5557
    public static function str_humanize($str): string
5558
    {
5559 3
        $str = \str_replace(
5560
            [
5561 3
                '_id',
5562
                '_',
5563
            ],
5564
            [
5565 3
                '',
5566
                ' ',
5567
            ],
5568 3
            $str
5569
        );
5570
5571 3
        return self::ucfirst(\trim($str));
5572
    }
5573
5574
    /**
5575
     * alias for "UTF8::str_istarts_with()"
5576
     *
5577
     * @param string $haystack
5578
     * @param string $needle
5579
     *
5580
     * @return bool
5581
     *
5582
     * @see UTF8::str_istarts_with()
5583
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
5584
     */
5585
    public static function str_ibegins(string $haystack, string $needle): bool
5586
    {
5587
        return self::str_istarts_with($haystack, $needle);
5588
    }
5589
5590
    /**
5591
     * alias for "UTF8::str_iends_with()"
5592
     *
5593
     * @param string $haystack
5594
     * @param string $needle
5595
     *
5596
     * @return bool
5597
     *
5598
     * @see UTF8::str_iends_with()
5599
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
5600
     */
5601
    public static function str_iends(string $haystack, string $needle): bool
5602
    {
5603
        return self::str_iends_with($haystack, $needle);
5604
    }
5605
5606
    /**
5607
     * Check if the string ends with the given substring, case-insensitive.
5608
     *
5609
     * @param string $haystack <p>The string to search in.</p>
5610
     * @param string $needle   <p>The substring to search for.</p>
5611
     *
5612
     * @return bool
5613
     */
5614
    public static function str_iends_with(string $haystack, string $needle): bool
5615
    {
5616 12
        if ($needle === '') {
5617 2
            return true;
5618
        }
5619
5620 12
        if ($haystack === '') {
5621
            return false;
5622
        }
5623
5624 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5625
    }
5626
5627
    /**
5628
     * Returns true if the string ends with any of $substrings, false otherwise.
5629
     *
5630
     * - case-insensitive
5631
     *
5632
     * @param string   $str        <p>The input string.</p>
5633
     * @param string[] $substrings <p>Substrings to look for.</p>
5634
     *
5635
     * @return bool
5636
     *              <p>Whether or not $str ends with $substring.</p>
5637
     */
5638
    public static function str_iends_with_any(string $str, array $substrings): bool
5639
    {
5640 4
        if ($substrings === []) {
5641
            return false;
5642
        }
5643
5644 4
        foreach ($substrings as &$substring) {
5645 4
            if (self::str_iends_with($str, $substring)) {
5646 4
                return true;
5647
            }
5648
        }
5649
5650
        return false;
5651
    }
5652
5653
    /**
5654
     * Returns the index of the first occurrence of $needle in the string,
5655
     * and false if not found. Accepts an optional offset from which to begin
5656
     * the search.
5657
     *
5658
     * @param string $str      <p>The input string.</p>
5659
     * @param string $needle   <p>Substring to look for.</p>
5660
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5661
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5662
     *
5663
     * @return false|int
5664
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5665
     *
5666
     * @see UTF8::stripos()
5667
     * @deprecated <p>please use "UTF8::stripos()"</p>
5668
     */
5669
    public static function str_iindex_first(
5670
        string $str,
5671
        string $needle,
5672
        int $offset = 0,
5673
        string $encoding = 'UTF-8'
5674
    ) {
5675
        return self::stripos(
5676
            $str,
5677
            $needle,
5678
            $offset,
5679
            $encoding
5680
        );
5681
    }
5682
5683
    /**
5684
     * Returns the index of the last occurrence of $needle in the string,
5685
     * and false if not found. Accepts an optional offset from which to begin
5686
     * the search. Offsets may be negative to count from the last character
5687
     * in the string.
5688
     *
5689
     * @param string $str      <p>The input string.</p>
5690
     * @param string $needle   <p>Substring to look for.</p>
5691
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5692
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5693
     *
5694
     * @return false|int
5695
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5696
     *
5697
     * @see UTF8::strripos()
5698
     * @deprecated <p>please use "UTF8::strripos()"</p>
5699
     */
5700
    public static function str_iindex_last(
5701
        string $str,
5702
        string $needle,
5703
        int $offset = 0,
5704
        string $encoding = 'UTF-8'
5705
    ) {
5706
        return self::strripos(
5707
            $str,
5708
            $needle,
5709
            $offset,
5710
            $encoding
5711
        );
5712
    }
5713
5714
    /**
5715
     * Returns the index of the first occurrence of $needle in the string,
5716
     * and false if not found. Accepts an optional offset from which to begin
5717
     * the search.
5718
     *
5719
     * @param string $str      <p>The input string.</p>
5720
     * @param string $needle   <p>Substring to look for.</p>
5721
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5722
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5723
     *
5724
     * @return false|int
5725
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5726
     *
5727
     * @see UTF8::strpos()
5728
     * @deprecated <p>please use "UTF8::strpos()"</p>
5729
     */
5730
    public static function str_index_first(
5731
        string $str,
5732
        string $needle,
5733
        int $offset = 0,
5734
        string $encoding = 'UTF-8'
5735
    ) {
5736 10
        return self::strpos(
5737 10
            $str,
5738 10
            $needle,
5739 10
            $offset,
5740 10
            $encoding
5741
        );
5742
    }
5743
5744
    /**
5745
     * Returns the index of the last occurrence of $needle in the string,
5746
     * and false if not found. Accepts an optional offset from which to begin
5747
     * the search. Offsets may be negative to count from the last character
5748
     * in the string.
5749
     *
5750
     * @param string $str      <p>The input string.</p>
5751
     * @param string $needle   <p>Substring to look for.</p>
5752
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5753
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5754
     *
5755
     * @return false|int
5756
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5757
     *
5758
     * @see UTF8::strrpos()
5759
     * @deprecated <p>please use "UTF8::strrpos()"</p>
5760
     */
5761
    public static function str_index_last(
5762
        string $str,
5763
        string $needle,
5764
        int $offset = 0,
5765
        string $encoding = 'UTF-8'
5766
    ) {
5767 10
        return self::strrpos(
5768 10
            $str,
5769 10
            $needle,
5770 10
            $offset,
5771 10
            $encoding
5772
        );
5773
    }
5774
5775
    /**
5776
     * Inserts $substring into the string at the $index provided.
5777
     *
5778
     * @param string $str       <p>The input string.</p>
5779
     * @param string $substring <p>String to be inserted.</p>
5780
     * @param int    $index     <p>The index at which to insert the substring.</p>
5781
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5782
     *
5783
     * @return string
5784
     */
5785
    public static function str_insert(
5786
        string $str,
5787
        string $substring,
5788
        int $index,
5789
        string $encoding = 'UTF-8'
5790
    ): string {
5791 8
        if ($encoding === 'UTF-8') {
5792 4
            $len = (int) \mb_strlen($str);
5793 4
            if ($index > $len) {
5794
                return $str;
5795
            }
5796
5797
            /** @noinspection UnnecessaryCastingInspection */
5798 4
            return (string) \mb_substr($str, 0, $index) .
5799 4
                   $substring .
5800 4
                   (string) \mb_substr($str, $index, $len);
5801
        }
5802
5803 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5804
5805 4
        $len = (int) self::strlen($str, $encoding);
5806 4
        if ($index > $len) {
5807 1
            return $str;
5808
        }
5809
5810 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5811 3
               $substring .
5812 3
               ((string) self::substr($str, $index, $len, $encoding));
5813
    }
5814
5815
    /**
5816
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5817
     *
5818
     * @see http://php.net/manual/en/function.str-ireplace.php
5819
     *
5820
     * @param mixed $search  <p>
5821
     *                       Every replacement with search array is
5822
     *                       performed on the result of previous replacement.
5823
     *                       </p>
5824
     * @param mixed $replace <p>
5825
     *                       </p>
5826
     * @param mixed $subject <p>
5827
     *                       If subject is an array, then the search and
5828
     *                       replace is performed with every entry of
5829
     *                       subject, and the return value is an array as
5830
     *                       well.
5831
     *                       </p>
5832
     * @param int   $count   [optional] <p>
5833
     *                       The number of matched and replaced needles will
5834
     *                       be returned in count which is passed by
5835
     *                       reference.
5836
     *                       </p>
5837
     *
5838
     * @return mixed a string or an array of replacements
5839
     */
5840
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5841
    {
5842 29
        $search = (array) $search;
5843
5844
        /** @noinspection AlterInForeachInspection */
5845 29
        foreach ($search as &$s) {
5846 29
            $s = (string) $s;
5847 29
            if ($s === '') {
5848 6
                $s = '/^(?<=.)$/';
5849
            } else {
5850 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5851
            }
5852
        }
5853
5854 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5855 29
        $count = $replace; // used as reference parameter
5856
5857 29
        return $subject;
5858
    }
5859
5860
    /**
5861
     * Replaces $search from the beginning of string with $replacement.
5862
     *
5863
     * @param string $str         <p>The input string.</p>
5864
     * @param string $search      <p>The string to search for.</p>
5865
     * @param string $replacement <p>The replacement.</p>
5866
     *
5867
     * @return string string after the replacements
5868
     */
5869
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5870
    {
5871 17
        if ($str === '') {
5872 4
            if ($replacement === '') {
5873 2
                return '';
5874
            }
5875
5876 2
            if ($search === '') {
5877 2
                return $replacement;
5878
            }
5879
        }
5880
5881 13
        if ($search === '') {
5882 2
            return $str . $replacement;
5883
        }
5884
5885 11
        if (\stripos($str, $search) === 0) {
5886 10
            return $replacement . \substr($str, \strlen($search));
5887
        }
5888
5889 1
        return $str;
5890
    }
5891
5892
    /**
5893
     * Replaces $search from the ending of string with $replacement.
5894
     *
5895
     * @param string $str         <p>The input string.</p>
5896
     * @param string $search      <p>The string to search for.</p>
5897
     * @param string $replacement <p>The replacement.</p>
5898
     *
5899
     * @return string
5900
     *                <p>string after the replacements.</p>
5901
     */
5902
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5903
    {
5904 17
        if ($str === '') {
5905 4
            if ($replacement === '') {
5906 2
                return '';
5907
            }
5908
5909 2
            if ($search === '') {
5910 2
                return $replacement;
5911
            }
5912
        }
5913
5914 13
        if ($search === '') {
5915 2
            return $str . $replacement;
5916
        }
5917
5918 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5919 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5920
        }
5921
5922 11
        return $str;
5923
    }
5924
5925
    /**
5926
     * Check if the string starts with the given substring, case-insensitive.
5927
     *
5928
     * @param string $haystack <p>The string to search in.</p>
5929
     * @param string $needle   <p>The substring to search for.</p>
5930
     *
5931
     * @return bool
5932
     */
5933
    public static function str_istarts_with(string $haystack, string $needle): bool
5934
    {
5935 12
        if ($needle === '') {
5936 2
            return true;
5937
        }
5938
5939 12
        if ($haystack === '') {
5940
            return false;
5941
        }
5942
5943 12
        return self::stripos($haystack, $needle) === 0;
5944
    }
5945
5946
    /**
5947
     * Returns true if the string begins with any of $substrings, false otherwise.
5948
     *
5949
     * - case-insensitive
5950
     *
5951
     * @param string $str        <p>The input string.</p>
5952
     * @param array  $substrings <p>Substrings to look for.</p>
5953
     *
5954
     * @return bool whether or not $str starts with $substring
5955
     */
5956
    public static function str_istarts_with_any(string $str, array $substrings): bool
5957
    {
5958 4
        if ($str === '') {
5959
            return false;
5960
        }
5961
5962 4
        if ($substrings === []) {
5963
            return false;
5964
        }
5965
5966 4
        foreach ($substrings as &$substring) {
5967 4
            if (self::str_istarts_with($str, $substring)) {
5968 4
                return true;
5969
            }
5970
        }
5971
5972
        return false;
5973
    }
5974
5975
    /**
5976
     * Gets the substring after the first occurrence of a separator.
5977
     *
5978
     * @param string $str       <p>The input string.</p>
5979
     * @param string $separator <p>The string separator.</p>
5980
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5981
     *
5982
     * @return string
5983
     */
5984
    public static function str_isubstr_after_first_separator(
5985
        string $str,
5986
        string $separator,
5987
        string $encoding = 'UTF-8'
5988
    ): string {
5989 1
        if ($separator === '' || $str === '') {
5990 1
            return '';
5991
        }
5992
5993 1
        $offset = self::stripos($str, $separator);
5994 1
        if ($offset === false) {
5995 1
            return '';
5996
        }
5997
5998 1
        if ($encoding === 'UTF-8') {
5999 1
            return (string) \mb_substr(
6000 1
                $str,
6001 1
                $offset + (int) \mb_strlen($separator)
6002
            );
6003
        }
6004
6005
        return (string) self::substr(
6006
            $str,
6007
            $offset + (int) self::strlen($separator, $encoding),
6008
            null,
6009
            $encoding
6010
        );
6011
    }
6012
6013
    /**
6014
     * Gets the substring after the last occurrence of a separator.
6015
     *
6016
     * @param string $str       <p>The input string.</p>
6017
     * @param string $separator <p>The string separator.</p>
6018
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6019
     *
6020
     * @return string
6021
     */
6022
    public static function str_isubstr_after_last_separator(
6023
        string $str,
6024
        string $separator,
6025
        string $encoding = 'UTF-8'
6026
    ): string {
6027 1
        if ($separator === '' || $str === '') {
6028 1
            return '';
6029
        }
6030
6031 1
        $offset = self::strripos($str, $separator);
6032 1
        if ($offset === false) {
6033 1
            return '';
6034
        }
6035
6036 1
        if ($encoding === 'UTF-8') {
6037 1
            return (string) \mb_substr(
6038 1
                $str,
6039 1
                $offset + (int) self::strlen($separator)
6040
            );
6041
        }
6042
6043
        return (string) self::substr(
6044
            $str,
6045
            $offset + (int) self::strlen($separator, $encoding),
6046
            null,
6047
            $encoding
6048
        );
6049
    }
6050
6051
    /**
6052
     * Gets the substring before the first occurrence of a separator.
6053
     *
6054
     * @param string $str       <p>The input string.</p>
6055
     * @param string $separator <p>The string separator.</p>
6056
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6057
     *
6058
     * @return string
6059
     */
6060
    public static function str_isubstr_before_first_separator(
6061
        string $str,
6062
        string $separator,
6063
        string $encoding = 'UTF-8'
6064
    ): string {
6065 1
        if ($separator === '' || $str === '') {
6066 1
            return '';
6067
        }
6068
6069 1
        $offset = self::stripos($str, $separator);
6070 1
        if ($offset === false) {
6071 1
            return '';
6072
        }
6073
6074 1
        if ($encoding === 'UTF-8') {
6075 1
            return (string) \mb_substr($str, 0, $offset);
6076
        }
6077
6078
        return (string) self::substr($str, 0, $offset, $encoding);
6079
    }
6080
6081
    /**
6082
     * Gets the substring before the last occurrence of a separator.
6083
     *
6084
     * @param string $str       <p>The input string.</p>
6085
     * @param string $separator <p>The string separator.</p>
6086
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6087
     *
6088
     * @return string
6089
     */
6090
    public static function str_isubstr_before_last_separator(
6091
        string $str,
6092
        string $separator,
6093
        string $encoding = 'UTF-8'
6094
    ): string {
6095 1
        if ($separator === '' || $str === '') {
6096 1
            return '';
6097
        }
6098
6099 1
        if ($encoding === 'UTF-8') {
6100 1
            $offset = \mb_strripos($str, $separator);
6101 1
            if ($offset === false) {
6102 1
                return '';
6103
            }
6104
6105 1
            return (string) \mb_substr($str, 0, $offset);
6106
        }
6107
6108
        $offset = self::strripos($str, $separator, 0, $encoding);
6109
        if ($offset === false) {
6110
            return '';
6111
        }
6112
6113
        return (string) self::substr($str, 0, $offset, $encoding);
6114
    }
6115
6116
    /**
6117
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6118
     *
6119
     * @param string $str           <p>The input string.</p>
6120
     * @param string $needle        <p>The string to look for.</p>
6121
     * @param bool   $before_needle [optional] <p>Default: false</p>
6122
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6123
     *
6124
     * @return string
6125
     */
6126
    public static function str_isubstr_first(
6127
        string $str,
6128
        string $needle,
6129
        bool $before_needle = false,
6130
        string $encoding = 'UTF-8'
6131
    ): string {
6132
        if (
6133 2
            $needle === ''
6134
            ||
6135 2
            $str === ''
6136
        ) {
6137 2
            return '';
6138
        }
6139
6140 2
        $part = self::stristr(
6141 2
            $str,
6142 2
            $needle,
6143 2
            $before_needle,
6144 2
            $encoding
6145
        );
6146 2
        if ($part === false) {
6147 2
            return '';
6148
        }
6149
6150 2
        return $part;
6151
    }
6152
6153
    /**
6154
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6155
     *
6156
     * @param string $str           <p>The input string.</p>
6157
     * @param string $needle        <p>The string to look for.</p>
6158
     * @param bool   $before_needle [optional] <p>Default: false</p>
6159
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6160
     *
6161
     * @return string
6162
     */
6163
    public static function str_isubstr_last(
6164
        string $str,
6165
        string $needle,
6166
        bool $before_needle = false,
6167
        string $encoding = 'UTF-8'
6168
    ): string {
6169
        if (
6170 1
            $needle === ''
6171
            ||
6172 1
            $str === ''
6173
        ) {
6174 1
            return '';
6175
        }
6176
6177 1
        $part = self::strrichr(
6178 1
            $str,
6179 1
            $needle,
6180 1
            $before_needle,
6181 1
            $encoding
6182
        );
6183 1
        if ($part === false) {
6184 1
            return '';
6185
        }
6186
6187 1
        return $part;
6188
    }
6189
6190
    /**
6191
     * Returns the last $n characters of the string.
6192
     *
6193
     * @param string $str      <p>The input string.</p>
6194
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6195
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6196
     *
6197
     * @return string
6198
     */
6199
    public static function str_last_char(
6200
        string $str,
6201
        int $n = 1,
6202
        string $encoding = 'UTF-8'
6203
    ): string {
6204 12
        if ($str === '' || $n <= 0) {
6205 4
            return '';
6206
        }
6207
6208 8
        if ($encoding === 'UTF-8') {
6209 4
            return (string) \mb_substr($str, -$n);
6210
        }
6211
6212 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6213
6214 4
        return (string) self::substr($str, -$n, null, $encoding);
6215
    }
6216
6217
    /**
6218
     * Limit the number of characters in a string.
6219
     *
6220
     * @param string $str        <p>The input string.</p>
6221
     * @param int    $length     [optional] <p>Default: 100</p>
6222
     * @param string $str_add_on [optional] <p>Default: …</p>
6223
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6224
     *
6225
     * @return string
6226
     */
6227
    public static function str_limit(
6228
        string $str,
6229
        int $length = 100,
6230
        string $str_add_on = '…',
6231
        string $encoding = 'UTF-8'
6232
    ): string {
6233 2
        if ($str === '' || $length <= 0) {
6234 2
            return '';
6235
        }
6236
6237 2
        if ($encoding === 'UTF-8') {
6238 2
            if ((int) \mb_strlen($str) <= $length) {
6239 2
                return $str;
6240
            }
6241
6242
            /** @noinspection UnnecessaryCastingInspection */
6243 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6244
        }
6245
6246
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6247
6248
        if ((int) self::strlen($str, $encoding) <= $length) {
6249
            return $str;
6250
        }
6251
6252
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6253
    }
6254
6255
    /**
6256
     * Limit the number of characters in a string, but also after the next word.
6257
     *
6258
     * @param string $str        <p>The input string.</p>
6259
     * @param int    $length     [optional] <p>Default: 100</p>
6260
     * @param string $str_add_on [optional] <p>Default: …</p>
6261
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6262
     *
6263
     * @return string
6264
     */
6265
    public static function str_limit_after_word(
6266
        string $str,
6267
        int $length = 100,
6268
        string $str_add_on = '…',
6269
        string $encoding = 'UTF-8'
6270
    ): string {
6271 6
        if ($str === '' || $length <= 0) {
6272 2
            return '';
6273
        }
6274
6275 6
        if ($encoding === 'UTF-8') {
6276
            /** @noinspection UnnecessaryCastingInspection */
6277 2
            if ((int) \mb_strlen($str) <= $length) {
6278 2
                return $str;
6279
            }
6280
6281 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6282 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6283
            }
6284
6285 2
            $str = \mb_substr($str, 0, $length);
6286
6287 2
            $array = \explode(' ', $str);
6288 2
            \array_pop($array);
6289 2
            $new_str = \implode(' ', $array);
6290
6291 2
            if ($new_str === '') {
6292 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6293
            }
6294
        } else {
6295 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6296
                return $str;
6297
            }
6298
6299 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6300 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6301
            }
6302
6303
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6304 1
            $str = self::substr($str, 0, $length, $encoding);
6305
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6306 1
            if ($str === false) {
6307
                return '' . $str_add_on;
6308
            }
6309
6310 1
            $array = \explode(' ', $str);
6311 1
            \array_pop($array);
6312 1
            $new_str = \implode(' ', $array);
6313
6314 1
            if ($new_str === '') {
6315
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6316
            }
6317
        }
6318
6319 3
        return $new_str . $str_add_on;
6320
    }
6321
6322
    /**
6323
     * Returns the longest common prefix between the $str1 and $str2.
6324
     *
6325
     * @param string $str1     <p>The input sting.</p>
6326
     * @param string $str2     <p>Second string for comparison.</p>
6327
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6328
     *
6329
     * @return string
6330
     */
6331
    public static function str_longest_common_prefix(
6332
        string $str1,
6333
        string $str2,
6334
        string $encoding = 'UTF-8'
6335
    ): string {
6336
        // init
6337 10
        $longest_common_prefix = '';
6338
6339 10
        if ($encoding === 'UTF-8') {
6340 5
            $max_length = (int) \min(
6341 5
                \mb_strlen($str1),
6342 5
                \mb_strlen($str2)
6343
            );
6344
6345 5
            for ($i = 0; $i < $max_length; ++$i) {
6346 4
                $char = \mb_substr($str1, $i, 1);
6347
6348
                if (
6349 4
                    $char !== false
6350
                    &&
6351 4
                    $char === \mb_substr($str2, $i, 1)
6352
                ) {
6353 3
                    $longest_common_prefix .= $char;
6354
                } else {
6355 3
                    break;
6356
                }
6357
            }
6358
        } else {
6359 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6360
6361 5
            $max_length = (int) \min(
6362 5
                self::strlen($str1, $encoding),
6363 5
                self::strlen($str2, $encoding)
6364
            );
6365
6366 5
            for ($i = 0; $i < $max_length; ++$i) {
6367 4
                $char = self::substr($str1, $i, 1, $encoding);
6368
6369
                if (
6370 4
                    $char !== false
6371
                    &&
6372 4
                    $char === self::substr($str2, $i, 1, $encoding)
6373
                ) {
6374 3
                    $longest_common_prefix .= $char;
6375
                } else {
6376 3
                    break;
6377
                }
6378
            }
6379
        }
6380
6381 10
        return $longest_common_prefix;
6382
    }
6383
6384
    /**
6385
     * Returns the longest common substring between the $str1 and $str2.
6386
     * In the case of ties, it returns that which occurs first.
6387
     *
6388
     * @param string $str1
6389
     * @param string $str2     <p>Second string for comparison.</p>
6390
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6391
     *
6392
     * @return string
6393
     *                <p>A string with its $str being the longest common substring.</p>
6394
     */
6395
    public static function str_longest_common_substring(
6396
        string $str1,
6397
        string $str2,
6398
        string $encoding = 'UTF-8'
6399
    ): string {
6400 11
        if ($str1 === '' || $str2 === '') {
6401 2
            return '';
6402
        }
6403
6404
        // Uses dynamic programming to solve
6405
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6406
6407 9
        if ($encoding === 'UTF-8') {
6408 4
            $str_length = (int) \mb_strlen($str1);
6409 4
            $other_length = (int) \mb_strlen($str2);
6410
        } else {
6411 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6412
6413 5
            $str_length = (int) self::strlen($str1, $encoding);
6414 5
            $other_length = (int) self::strlen($str2, $encoding);
6415
        }
6416
6417
        // Return if either string is empty
6418 9
        if ($str_length === 0 || $other_length === 0) {
6419
            return '';
6420
        }
6421
6422 9
        $len = 0;
6423 9
        $end = 0;
6424 9
        $table = \array_fill(
6425 9
            0,
6426 9
            $str_length + 1,
6427 9
            \array_fill(0, $other_length + 1, 0)
6428
        );
6429
6430 9
        if ($encoding === 'UTF-8') {
6431 9
            for ($i = 1; $i <= $str_length; ++$i) {
6432 9
                for ($j = 1; $j <= $other_length; ++$j) {
6433 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6434 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6435
6436 9
                    if ($str_char === $other_char) {
6437 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6438 8
                        if ($table[$i][$j] > $len) {
6439 8
                            $len = $table[$i][$j];
6440 8
                            $end = $i;
6441
                        }
6442
                    } else {
6443 9
                        $table[$i][$j] = 0;
6444
                    }
6445
                }
6446
            }
6447
        } else {
6448
            for ($i = 1; $i <= $str_length; ++$i) {
6449
                for ($j = 1; $j <= $other_length; ++$j) {
6450
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6451
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6452
6453
                    if ($str_char === $other_char) {
6454
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6455
                        if ($table[$i][$j] > $len) {
6456
                            $len = $table[$i][$j];
6457
                            $end = $i;
6458
                        }
6459
                    } else {
6460
                        $table[$i][$j] = 0;
6461
                    }
6462
                }
6463
            }
6464
        }
6465
6466 9
        if ($encoding === 'UTF-8') {
6467 9
            return (string) \mb_substr($str1, $end - $len, $len);
6468
        }
6469
6470
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6471
    }
6472
6473
    /**
6474
     * Returns the longest common suffix between the $str1 and $str2.
6475
     *
6476
     * @param string $str1
6477
     * @param string $str2     <p>Second string for comparison.</p>
6478
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6479
     *
6480
     * @return string
6481
     */
6482
    public static function str_longest_common_suffix(
6483
        string $str1,
6484
        string $str2,
6485
        string $encoding = 'UTF-8'
6486
    ): string {
6487 10
        if ($str1 === '' || $str2 === '') {
6488 2
            return '';
6489
        }
6490
6491 8
        if ($encoding === 'UTF-8') {
6492 4
            $max_length = (int) \min(
6493 4
                \mb_strlen($str1, $encoding),
6494 4
                \mb_strlen($str2, $encoding)
6495
            );
6496
6497 4
            $longest_common_suffix = '';
6498 4
            for ($i = 1; $i <= $max_length; ++$i) {
6499 4
                $char = \mb_substr($str1, -$i, 1);
6500
6501
                if (
6502 4
                    $char !== false
6503
                    &&
6504 4
                    $char === \mb_substr($str2, -$i, 1)
6505
                ) {
6506 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6507
                } else {
6508 3
                    break;
6509
                }
6510
            }
6511
        } else {
6512 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6513
6514 4
            $max_length = (int) \min(
6515 4
                self::strlen($str1, $encoding),
6516 4
                self::strlen($str2, $encoding)
6517
            );
6518
6519 4
            $longest_common_suffix = '';
6520 4
            for ($i = 1; $i <= $max_length; ++$i) {
6521 4
                $char = self::substr($str1, -$i, 1, $encoding);
6522
6523
                if (
6524 4
                    $char !== false
6525
                    &&
6526 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6527
                ) {
6528 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6529
                } else {
6530 3
                    break;
6531
                }
6532
            }
6533
        }
6534
6535 8
        return $longest_common_suffix;
6536
    }
6537
6538
    /**
6539
     * Returns true if $str matches the supplied pattern, false otherwise.
6540
     *
6541
     * @param string $str     <p>The input string.</p>
6542
     * @param string $pattern <p>Regex pattern to match against.</p>
6543
     *
6544
     * @return bool whether or not $str matches the pattern
6545
     */
6546
    public static function str_matches_pattern(string $str, string $pattern): bool
6547
    {
6548
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6549
    }
6550
6551
    /**
6552
     * Returns whether or not a character exists at an index. Offsets may be
6553
     * negative to count from the last character in the string. Implements
6554
     * part of the ArrayAccess interface.
6555
     *
6556
     * @param string $str      <p>The input string.</p>
6557
     * @param int    $offset   <p>The index to check.</p>
6558
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6559
     *
6560
     * @return bool whether or not the index exists
6561
     */
6562
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6563
    {
6564
        // init
6565 6
        $length = (int) self::strlen($str, $encoding);
6566
6567 6
        if ($offset >= 0) {
6568 3
            return $length > $offset;
6569
        }
6570
6571 3
        return $length >= \abs($offset);
6572
    }
6573
6574
    /**
6575
     * Returns the character at the given index. Offsets may be negative to
6576
     * count from the last character in the string. Implements part of the
6577
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6578
     * does not exist.
6579
     *
6580
     * @param string $str      <p>The input string.</p>
6581
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6582
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6583
     *
6584
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6585
     *
6586
     * @return string
6587
     *                <p>The character at the specified index.</p>
6588
     */
6589
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6590
    {
6591
        // init
6592 2
        $length = (int) self::strlen($str);
6593
6594
        if (
6595 2
            ($index >= 0 && $length <= $index)
6596
            ||
6597 2
            $length < \abs($index)
6598
        ) {
6599 1
            throw new \OutOfBoundsException('No character exists at the index');
6600
        }
6601
6602 1
        return self::char_at($str, $index, $encoding);
6603
    }
6604
6605
    /**
6606
     * Pad a UTF-8 string to a given length with another string.
6607
     *
6608
     * @param string     $str        <p>The input string.</p>
6609
     * @param int        $pad_length <p>The length of return string.</p>
6610
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6611
     * @param int|string $pad_type   [optional] <p>
6612
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6613
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6614
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6615
     *                               </p>
6616
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6617
     *
6618
     * @return string
6619
     *                <p>Returns the padded string.</p>
6620
     */
6621
    public static function str_pad(
6622
        string $str,
6623
        int $pad_length,
6624
        string $pad_string = ' ',
6625
        $pad_type = \STR_PAD_RIGHT,
6626
        string $encoding = 'UTF-8'
6627
    ): string {
6628 41
        if ($pad_length === 0 || $pad_string === '') {
6629 1
            return $str;
6630
        }
6631
6632 41
        if ($pad_type !== (int) $pad_type) {
6633 13
            if ($pad_type === 'left') {
6634 3
                $pad_type = \STR_PAD_LEFT;
6635 10
            } elseif ($pad_type === 'right') {
6636 6
                $pad_type = \STR_PAD_RIGHT;
6637 4
            } elseif ($pad_type === 'both') {
6638 3
                $pad_type = \STR_PAD_BOTH;
6639
            } else {
6640 1
                throw new \InvalidArgumentException(
6641 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6642
                );
6643
            }
6644
        }
6645
6646 40
        if ($encoding === 'UTF-8') {
6647 25
            $str_length = (int) \mb_strlen($str);
6648
6649 25
            if ($pad_length >= $str_length) {
6650
                switch ($pad_type) {
6651 25
                    case \STR_PAD_LEFT:
6652 8
                        $ps_length = (int) \mb_strlen($pad_string);
6653
6654 8
                        $diff = ($pad_length - $str_length);
6655
6656 8
                        $pre = (string) \mb_substr(
6657 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6658 8
                            0,
6659 8
                            $diff
6660
                        );
6661 8
                        $post = '';
6662
6663 8
                        break;
6664
6665 20
                    case \STR_PAD_BOTH:
6666 14
                        $diff = ($pad_length - $str_length);
6667
6668 14
                        $ps_length_left = (int) \floor($diff / 2);
6669
6670 14
                        $ps_length_right = (int) \ceil($diff / 2);
6671
6672 14
                        $pre = (string) \mb_substr(
6673 14
                            \str_repeat($pad_string, $ps_length_left),
6674 14
                            0,
6675 14
                            $ps_length_left
6676
                        );
6677 14
                        $post = (string) \mb_substr(
6678 14
                            \str_repeat($pad_string, $ps_length_right),
6679 14
                            0,
6680 14
                            $ps_length_right
6681
                        );
6682
6683 14
                        break;
6684
6685 9
                    case \STR_PAD_RIGHT:
6686
                    default:
6687 9
                        $ps_length = (int) \mb_strlen($pad_string);
6688
6689 9
                        $diff = ($pad_length - $str_length);
6690
6691 9
                        $post = (string) \mb_substr(
6692 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6693 9
                            0,
6694 9
                            $diff
6695
                        );
6696 9
                        $pre = '';
6697
                }
6698
6699 25
                return $pre . $str . $post;
6700
            }
6701
6702 3
            return $str;
6703
        }
6704
6705 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6706
6707 15
        $str_length = (int) self::strlen($str, $encoding);
6708
6709 15
        if ($pad_length >= $str_length) {
6710
            switch ($pad_type) {
6711 14
                case \STR_PAD_LEFT:
6712 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6713
6714 5
                    $diff = ($pad_length - $str_length);
6715
6716 5
                    $pre = (string) self::substr(
6717 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6718 5
                        0,
6719 5
                        $diff,
6720 5
                        $encoding
6721
                    );
6722 5
                    $post = '';
6723
6724 5
                    break;
6725
6726 9
                case \STR_PAD_BOTH:
6727 3
                    $diff = ($pad_length - $str_length);
6728
6729 3
                    $ps_length_left = (int) \floor($diff / 2);
6730
6731 3
                    $ps_length_right = (int) \ceil($diff / 2);
6732
6733 3
                    $pre = (string) self::substr(
6734 3
                        \str_repeat($pad_string, $ps_length_left),
6735 3
                        0,
6736 3
                        $ps_length_left,
6737 3
                        $encoding
6738
                    );
6739 3
                    $post = (string) self::substr(
6740 3
                        \str_repeat($pad_string, $ps_length_right),
6741 3
                        0,
6742 3
                        $ps_length_right,
6743 3
                        $encoding
6744
                    );
6745
6746 3
                    break;
6747
6748 6
                case \STR_PAD_RIGHT:
6749
                default:
6750 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6751
6752 6
                    $diff = ($pad_length - $str_length);
6753
6754 6
                    $post = (string) self::substr(
6755 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6756 6
                        0,
6757 6
                        $diff,
6758 6
                        $encoding
6759
                    );
6760 6
                    $pre = '';
6761
            }
6762
6763 14
            return $pre . $str . $post;
6764
        }
6765
6766 1
        return $str;
6767
    }
6768
6769
    /**
6770
     * Returns a new string of a given length such that both sides of the
6771
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
6772
     *
6773
     * @param string $str
6774
     * @param int    $length   <p>Desired string length after padding.</p>
6775
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6776
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6777
     *
6778
     * @return string
6779
     *                <p>The string with padding applied.</p>
6780
     */
6781
    public static function str_pad_both(
6782
        string $str,
6783
        int $length,
6784
        string $pad_str = ' ',
6785
        string $encoding = 'UTF-8'
6786
    ): string {
6787 11
        return self::str_pad(
6788 11
            $str,
6789 11
            $length,
6790 11
            $pad_str,
6791 11
            \STR_PAD_BOTH,
6792 11
            $encoding
6793
        );
6794
    }
6795
6796
    /**
6797
     * Returns a new string of a given length such that the beginning of the
6798
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
6799
     *
6800
     * @param string $str
6801
     * @param int    $length   <p>Desired string length after padding.</p>
6802
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6803
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6804
     *
6805
     * @return string
6806
     *                <p>The string with left padding.</p>
6807
     */
6808
    public static function str_pad_left(
6809
        string $str,
6810
        int $length,
6811
        string $pad_str = ' ',
6812
        string $encoding = 'UTF-8'
6813
    ): string {
6814 7
        return self::str_pad(
6815 7
            $str,
6816 7
            $length,
6817 7
            $pad_str,
6818 7
            \STR_PAD_LEFT,
6819 7
            $encoding
6820
        );
6821
    }
6822
6823
    /**
6824
     * Returns a new string of a given length such that the end of the string
6825
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
6826
     *
6827
     * @param string $str
6828
     * @param int    $length   <p>Desired string length after padding.</p>
6829
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6830
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6831
     *
6832
     * @return string
6833
     *                <p>The string with right padding.</p>
6834
     */
6835
    public static function str_pad_right(
6836
        string $str,
6837
        int $length,
6838
        string $pad_str = ' ',
6839
        string $encoding = 'UTF-8'
6840
    ): string {
6841 7
        return self::str_pad(
6842 7
            $str,
6843 7
            $length,
6844 7
            $pad_str,
6845 7
            \STR_PAD_RIGHT,
6846 7
            $encoding
6847
        );
6848
    }
6849
6850
    /**
6851
     * Repeat a string.
6852
     *
6853
     * @param string $str        <p>
6854
     *                           The string to be repeated.
6855
     *                           </p>
6856
     * @param int    $multiplier <p>
6857
     *                           Number of time the input string should be
6858
     *                           repeated.
6859
     *                           </p>
6860
     *                           <p>
6861
     *                           multiplier has to be greater than or equal to 0.
6862
     *                           If the multiplier is set to 0, the function
6863
     *                           will return an empty string.
6864
     *                           </p>
6865
     *
6866
     * @return string
6867
     *                <p>The repeated string.</P>
6868
     */
6869
    public static function str_repeat(string $str, int $multiplier): string
6870
    {
6871 9
        $str = self::filter($str);
6872
6873 9
        return \str_repeat($str, $multiplier);
6874
    }
6875
6876
    /**
6877
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6878
     *
6879
     * Replace all occurrences of the search string with the replacement string
6880
     *
6881
     * @see http://php.net/manual/en/function.str-replace.php
6882
     *
6883
     * @param mixed $search  <p>
6884
     *                       The value being searched for, otherwise known as the needle.
6885
     *                       An array may be used to designate multiple needles.
6886
     *                       </p>
6887
     * @param mixed $replace <p>
6888
     *                       The replacement value that replaces found search
6889
     *                       values. An array may be used to designate multiple replacements.
6890
     *                       </p>
6891
     * @param mixed $subject <p>
6892
     *                       The string or array being searched and replaced on,
6893
     *                       otherwise known as the haystack.
6894
     *                       </p>
6895
     *                       <p>
6896
     *                       If subject is an array, then the search and
6897
     *                       replace is performed with every entry of
6898
     *                       subject, and the return value is an array as
6899
     *                       well.
6900
     *                       </p>
6901
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6902
     *
6903
     * @return mixed this function returns a string or an array with the replaced values
6904
     */
6905
    public static function str_replace(
6906
        $search,
6907
        $replace,
6908
        $subject,
6909
        int &$count = null
6910
    ) {
6911
        /**
6912
         * @psalm-suppress PossiblyNullArgument
6913
         */
6914 12
        return \str_replace(
6915 12
            $search,
6916 12
            $replace,
6917 12
            $subject,
6918 12
            $count
6919
        );
6920
    }
6921
6922
    /**
6923
     * Replaces $search from the beginning of string with $replacement.
6924
     *
6925
     * @param string $str         <p>The input string.</p>
6926
     * @param string $search      <p>The string to search for.</p>
6927
     * @param string $replacement <p>The replacement.</p>
6928
     *
6929
     * @return string
6930
     *                <p>A string after the replacements.</p>
6931
     */
6932
    public static function str_replace_beginning(
6933
        string $str,
6934
        string $search,
6935
        string $replacement
6936
    ): string {
6937 17
        if ($str === '') {
6938 4
            if ($replacement === '') {
6939 2
                return '';
6940
            }
6941
6942 2
            if ($search === '') {
6943 2
                return $replacement;
6944
            }
6945
        }
6946
6947 13
        if ($search === '') {
6948 2
            return $str . $replacement;
6949
        }
6950
6951 11
        if (\strpos($str, $search) === 0) {
6952 9
            return $replacement . \substr($str, \strlen($search));
6953
        }
6954
6955 2
        return $str;
6956
    }
6957
6958
    /**
6959
     * Replaces $search from the ending of string with $replacement.
6960
     *
6961
     * @param string $str         <p>The input string.</p>
6962
     * @param string $search      <p>The string to search for.</p>
6963
     * @param string $replacement <p>The replacement.</p>
6964
     *
6965
     * @return string
6966
     *                <p>A string after the replacements.</p>
6967
     */
6968
    public static function str_replace_ending(
6969
        string $str,
6970
        string $search,
6971
        string $replacement
6972
    ): string {
6973 17
        if ($str === '') {
6974 4
            if ($replacement === '') {
6975 2
                return '';
6976
            }
6977
6978 2
            if ($search === '') {
6979 2
                return $replacement;
6980
            }
6981
        }
6982
6983 13
        if ($search === '') {
6984 2
            return $str . $replacement;
6985
        }
6986
6987 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6988 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6989
        }
6990
6991 11
        return $str;
6992
    }
6993
6994
    /**
6995
     * Replace the first "$search"-term with the "$replace"-term.
6996
     *
6997
     * @param string $search
6998
     * @param string $replace
6999
     * @param string $subject
7000
     *
7001
     * @return string
7002
     *
7003
     * @psalm-suppress InvalidReturnType
7004
     */
7005
    public static function str_replace_first(
7006
        string $search,
7007
        string $replace,
7008
        string $subject
7009
    ): string {
7010 2
        $pos = self::strpos($subject, $search);
7011
7012 2
        if ($pos !== false) {
7013
            /**
7014
             * @psalm-suppress InvalidReturnStatement
7015
             */
7016 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7017 2
                $subject,
7018 2
                $replace,
7019 2
                $pos,
7020 2
                (int) self::strlen($search)
7021
            );
7022
        }
7023
7024 2
        return $subject;
7025
    }
7026
7027
    /**
7028
     * Replace the last "$search"-term with the "$replace"-term.
7029
     *
7030
     * @param string $search
7031
     * @param string $replace
7032
     * @param string $subject
7033
     *
7034
     * @return string
7035
     *
7036
     * @psalm-suppress InvalidReturnType
7037
     */
7038
    public static function str_replace_last(
7039
        string $search,
7040
        string $replace,
7041
        string $subject
7042
    ): string {
7043 2
        $pos = self::strrpos($subject, $search);
7044 2
        if ($pos !== false) {
7045
            /**
7046
             * @psalm-suppress InvalidReturnStatement
7047
             */
7048 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7049 2
                $subject,
7050 2
                $replace,
7051 2
                $pos,
7052 2
                (int) self::strlen($search)
7053
            );
7054
        }
7055
7056 2
        return $subject;
7057
    }
7058
7059
    /**
7060
     * Shuffles all the characters in the string.
7061
     *
7062
     * PS: uses random algorithm which is weak for cryptography purposes
7063
     *
7064
     * @param string $str      <p>The input string</p>
7065
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7066
     *
7067
     * @return string
7068
     *                <p>The shuffled string.</p>
7069
     */
7070
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7071
    {
7072 5
        if ($encoding === 'UTF-8') {
7073 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7074
            /** @noinspection NonSecureShuffleUsageInspection */
7075 5
            \shuffle($indexes);
7076
7077
            // init
7078 5
            $shuffled_str = '';
7079
7080 5
            foreach ($indexes as &$i) {
7081 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7082 5
                if ($tmp_sub_str !== false) {
7083 5
                    $shuffled_str .= $tmp_sub_str;
7084
                }
7085
            }
7086
        } else {
7087
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7088
7089
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7090
            /** @noinspection NonSecureShuffleUsageInspection */
7091
            \shuffle($indexes);
7092
7093
            // init
7094
            $shuffled_str = '';
7095
7096
            foreach ($indexes as &$i) {
7097
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7098
                if ($tmp_sub_str !== false) {
7099
                    $shuffled_str .= $tmp_sub_str;
7100
                }
7101
            }
7102
        }
7103
7104 5
        return $shuffled_str;
7105
    }
7106
7107
    /**
7108
     * Returns the substring beginning at $start, and up to, but not including
7109
     * the index specified by $end. If $end is omitted, the function extracts
7110
     * the remaining string. If $end is negative, it is computed from the end
7111
     * of the string.
7112
     *
7113
     * @param string $str
7114
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7115
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7116
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7117
     *
7118
     * @return false|string
7119
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7120
     *                      characters long, <b>FALSE</b> will be returned.
7121
     */
7122
    public static function str_slice(
7123
        string $str,
7124
        int $start,
7125
        int $end = null,
7126
        string $encoding = 'UTF-8'
7127
    ) {
7128 18
        if ($encoding === 'UTF-8') {
7129 7
            if ($end === null) {
7130 1
                $length = (int) \mb_strlen($str);
7131 6
            } elseif ($end >= 0 && $end <= $start) {
7132 2
                return '';
7133 4
            } elseif ($end < 0) {
7134 1
                $length = (int) \mb_strlen($str) + $end - $start;
7135
            } else {
7136 3
                $length = $end - $start;
7137
            }
7138
7139 5
            return \mb_substr($str, $start, $length);
7140
        }
7141
7142 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7143
7144 11
        if ($end === null) {
7145 5
            $length = (int) self::strlen($str, $encoding);
7146 6
        } elseif ($end >= 0 && $end <= $start) {
7147 2
            return '';
7148 4
        } elseif ($end < 0) {
7149 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7150
        } else {
7151 3
            $length = $end - $start;
7152
        }
7153
7154 9
        return self::substr($str, $start, $length, $encoding);
7155
    }
7156
7157
    /**
7158
     * Convert a string to e.g.: "snake_case"
7159
     *
7160
     * @param string $str
7161
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7162
     *
7163
     * @return string
7164
     *                <p>A string in snake_case.</p>
7165
     */
7166
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7167
    {
7168 22
        if ($str === '') {
7169
            return '';
7170
        }
7171
7172 22
        $str = \str_replace(
7173 22
            '-',
7174 22
            '_',
7175 22
            self::normalize_whitespace($str)
7176
        );
7177
7178 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7179 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7180
        }
7181
7182 22
        $str = (string) \preg_replace_callback(
7183 22
            '/([\\p{N}|\\p{Lu}])/u',
7184
            /**
7185
             * @param string[] $matches
7186
             *
7187
             * @return string
7188
             */
7189
            static function (array $matches) use ($encoding): string {
7190 9
                $match = $matches[1];
7191 9
                $match_int = (int) $match;
7192
7193 9
                if ((string) $match_int === $match) {
7194 4
                    return '_' . $match . '_';
7195
                }
7196
7197 5
                if ($encoding === 'UTF-8') {
7198 5
                    return '_' . \mb_strtolower($match);
7199
                }
7200
7201
                return '_' . self::strtolower($match, $encoding);
7202 22
            },
7203 22
            $str
7204
        );
7205
7206 22
        $str = (string) \preg_replace(
7207
            [
7208 22
                '/\\s+/u',           // convert spaces to "_"
7209
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7210
                '/_+/',                 // remove double "_"
7211
            ],
7212
            [
7213 22
                '_',
7214
                '',
7215
                '_',
7216
            ],
7217 22
            $str
7218
        );
7219
7220 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7221
    }
7222
7223
    /**
7224
     * Sort all characters according to code points.
7225
     *
7226
     * @param string $str    <p>A UTF-8 string.</p>
7227
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7228
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7229
     *
7230
     * @return string
7231
     *                <p>A string of sorted characters.</p>
7232
     */
7233
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7234
    {
7235 2
        $array = self::codepoints($str);
7236
7237 2
        if ($unique) {
7238 2
            $array = \array_flip(\array_flip($array));
7239
        }
7240
7241 2
        if ($desc) {
7242 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7242
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7243
        } else {
7244 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7244
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7245
        }
7246
7247 2
        return self::string($array);
7248
    }
7249
7250
    /**
7251
     * Convert a string to an array of Unicode characters.
7252
     *
7253
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7254
     * @param int                       $length                  [optional] <p>Max character length of each array
7255
     *                                                           element.</p>
7256
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7257
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7258
     *                                                           "mb_substr"</p>
7259
     *
7260
     * @return array
7261
     *               <p>An array containing chunks of the input.</p>
7262
     */
7263
    public static function str_split(
7264
        $str,
7265
        int $length = 1,
7266
        bool $clean_utf8 = false,
7267
        bool $try_to_use_mb_functions = true
7268
    ): array {
7269 89
        if ($length <= 0) {
7270 3
            return [];
7271
        }
7272
7273 88
        if (\is_array($str) === true) {
7274 2
            foreach ($str as $k => &$v) {
7275 2
                $v = self::str_split(
7276 2
                    $v,
7277 2
                    $length,
7278 2
                    $clean_utf8,
7279 2
                    $try_to_use_mb_functions
7280
                );
7281
            }
7282
7283 2
            return $str;
7284
        }
7285
7286
        // init
7287 88
        $str = (string) $str;
7288
7289 88
        if ($str === '') {
7290 13
            return [];
7291
        }
7292
7293 85
        if ($clean_utf8 === true) {
7294 19
            $str = self::clean($str);
7295
        }
7296
7297
        if (
7298 85
            $try_to_use_mb_functions === true
7299
            &&
7300 85
            self::$SUPPORT['mbstring'] === true
7301
        ) {
7302 81
            if (Bootup::is_php('7.4')) {
7303
                $return = \mb_str_split($str, $length);
7304
                if ($return !== false) {
7305
                    return $return;
7306
                }
7307
            }
7308
7309 81
            $i_max = \mb_strlen($str);
7310 81
            if ($i_max <= 127) {
7311 75
                $ret = [];
7312 75
                for ($i = 0; $i < $i_max; ++$i) {
7313 75
                    $ret[] = \mb_substr($str, $i, 1);
7314
                }
7315
            } else {
7316 16
                $return_array = [];
7317 16
                \preg_match_all('/./us', $str, $return_array);
7318 81
                $ret = $return_array[0] ?? [];
7319
            }
7320 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7321 17
            $return_array = [];
7322 17
            \preg_match_all('/./us', $str, $return_array);
7323 17
            $ret = $return_array[0] ?? [];
7324
        } else {
7325
7326
            // fallback
7327
7328 8
            $ret = [];
7329 8
            $len = \strlen($str);
7330
7331
            /** @noinspection ForeachInvariantsInspection */
7332 8
            for ($i = 0; $i < $len; ++$i) {
7333 8
                if (($str[$i] & "\x80") === "\x00") {
7334 8
                    $ret[] = $str[$i];
7335
                } elseif (
7336 8
                    isset($str[$i + 1])
7337
                    &&
7338 8
                    ($str[$i] & "\xE0") === "\xC0"
7339
                ) {
7340 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7341 4
                        $ret[] = $str[$i] . $str[$i + 1];
7342
7343 4
                        ++$i;
7344
                    }
7345
                } elseif (
7346 6
                    isset($str[$i + 2])
7347
                    &&
7348 6
                    ($str[$i] & "\xF0") === "\xE0"
7349
                ) {
7350
                    if (
7351 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7352
                        &&
7353 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7354
                    ) {
7355 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7356
7357 6
                        $i += 2;
7358
                    }
7359
                } elseif (
7360
                    isset($str[$i + 3])
7361
                    &&
7362
                    ($str[$i] & "\xF8") === "\xF0"
7363
                ) {
7364
                    if (
7365
                        ($str[$i + 1] & "\xC0") === "\x80"
7366
                        &&
7367
                        ($str[$i + 2] & "\xC0") === "\x80"
7368
                        &&
7369
                        ($str[$i + 3] & "\xC0") === "\x80"
7370
                    ) {
7371
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7372
7373
                        $i += 3;
7374
                    }
7375
                }
7376
            }
7377
        }
7378
7379 85
        if ($length > 1) {
7380 11
            $ret = \array_chunk($ret, $length);
7381
7382 11
            return \array_map(
7383
                static function (array &$item): string {
7384 11
                    return \implode('', $item);
7385 11
                },
7386 11
                $ret
7387
            );
7388
        }
7389
7390 78
        if (isset($ret[0]) && $ret[0] === '') {
7391
            return [];
7392
        }
7393
7394 78
        return $ret;
7395
    }
7396
7397
    /**
7398
     * Splits the string with the provided regular expression, returning an
7399
     * array of strings. An optional integer $limit will truncate the
7400
     * results.
7401
     *
7402
     * @param string $str
7403
     * @param string $pattern <p>The regex with which to split the string.</p>
7404
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7405
     *
7406
     * @return string[]
7407
     *                  <p>An array of strings.</p>
7408
     */
7409
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7410
    {
7411 16
        if ($limit === 0) {
7412 2
            return [];
7413
        }
7414
7415 14
        if ($pattern === '') {
7416 1
            return [$str];
7417
        }
7418
7419 13
        if (self::$SUPPORT['mbstring'] === true) {
7420 13
            if ($limit >= 0) {
7421
                /** @noinspection PhpComposerExtensionStubsInspection */
7422 8
                $result_tmp = \mb_split($pattern, $str);
7423
7424 8
                $result = [];
7425 8
                foreach ($result_tmp as $item_tmp) {
7426 8
                    if ($limit === 0) {
7427 4
                        break;
7428
                    }
7429 8
                    --$limit;
7430
7431 8
                    $result[] = $item_tmp;
7432
                }
7433
7434 8
                return $result;
7435
            }
7436
7437
            /** @noinspection PhpComposerExtensionStubsInspection */
7438 5
            return \mb_split($pattern, $str);
7439
        }
7440
7441
        if ($limit > 0) {
7442
            ++$limit;
7443
        } else {
7444
            $limit = -1;
7445
        }
7446
7447
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7448
7449
        if ($array === false) {
7450
            return [];
7451
        }
7452
7453
        if ($limit > 0 && \count($array) === $limit) {
7454
            \array_pop($array);
7455
        }
7456
7457
        return $array;
7458
    }
7459
7460
    /**
7461
     * Check if the string starts with the given substring.
7462
     *
7463
     * @param string $haystack <p>The string to search in.</p>
7464
     * @param string $needle   <p>The substring to search for.</p>
7465
     *
7466
     * @return bool
7467
     */
7468
    public static function str_starts_with(string $haystack, string $needle): bool
7469
    {
7470 19
        if ($needle === '') {
7471 2
            return true;
7472
        }
7473
7474 19
        if ($haystack === '') {
7475
            return false;
7476
        }
7477
7478 19
        return \strpos($haystack, $needle) === 0;
7479
    }
7480
7481
    /**
7482
     * Returns true if the string begins with any of $substrings, false otherwise.
7483
     *
7484
     * - case-sensitive
7485
     *
7486
     * @param string $str        <p>The input string.</p>
7487
     * @param array  $substrings <p>Substrings to look for.</p>
7488
     *
7489
     * @return bool whether or not $str starts with $substring
7490
     */
7491
    public static function str_starts_with_any(string $str, array $substrings): bool
7492
    {
7493 8
        if ($str === '') {
7494
            return false;
7495
        }
7496
7497 8
        if ($substrings === []) {
7498
            return false;
7499
        }
7500
7501 8
        foreach ($substrings as &$substring) {
7502 8
            if (self::str_starts_with($str, $substring)) {
7503 8
                return true;
7504
            }
7505
        }
7506
7507 6
        return false;
7508
    }
7509
7510
    /**
7511
     * Gets the substring after the first occurrence of a separator.
7512
     *
7513
     * @param string $str       <p>The input string.</p>
7514
     * @param string $separator <p>The string separator.</p>
7515
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7516
     *
7517
     * @return string
7518
     */
7519
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7520
    {
7521 1
        if ($separator === '' || $str === '') {
7522 1
            return '';
7523
        }
7524
7525 1
        if ($encoding === 'UTF-8') {
7526 1
            $offset = \mb_strpos($str, $separator);
7527 1
            if ($offset === false) {
7528 1
                return '';
7529
            }
7530
7531 1
            return (string) \mb_substr(
7532 1
                $str,
7533 1
                $offset + (int) \mb_strlen($separator)
7534
            );
7535
        }
7536
7537
        $offset = self::strpos($str, $separator, 0, $encoding);
7538
        if ($offset === false) {
7539
            return '';
7540
        }
7541
7542
        return (string) \mb_substr(
7543
            $str,
7544
            $offset + (int) self::strlen($separator, $encoding),
7545
            null,
7546
            $encoding
7547
        );
7548
    }
7549
7550
    /**
7551
     * Gets the substring after the last occurrence of a separator.
7552
     *
7553
     * @param string $str       <p>The input string.</p>
7554
     * @param string $separator <p>The string separator.</p>
7555
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7556
     *
7557
     * @return string
7558
     */
7559
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7560
    {
7561 1
        if ($separator === '' || $str === '') {
7562 1
            return '';
7563
        }
7564
7565 1
        if ($encoding === 'UTF-8') {
7566 1
            $offset = \mb_strrpos($str, $separator);
7567 1
            if ($offset === false) {
7568 1
                return '';
7569
            }
7570
7571 1
            return (string) \mb_substr(
7572 1
                $str,
7573 1
                $offset + (int) \mb_strlen($separator)
7574
            );
7575
        }
7576
7577
        $offset = self::strrpos($str, $separator, 0, $encoding);
7578
        if ($offset === false) {
7579
            return '';
7580
        }
7581
7582
        return (string) self::substr(
7583
            $str,
7584
            $offset + (int) self::strlen($separator, $encoding),
7585
            null,
7586
            $encoding
7587
        );
7588
    }
7589
7590
    /**
7591
     * Gets the substring before the first occurrence of a separator.
7592
     *
7593
     * @param string $str       <p>The input string.</p>
7594
     * @param string $separator <p>The string separator.</p>
7595
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7596
     *
7597
     * @return string
7598
     */
7599
    public static function str_substr_before_first_separator(
7600
        string $str,
7601
        string $separator,
7602
        string $encoding = 'UTF-8'
7603
    ): string {
7604 1
        if ($separator === '' || $str === '') {
7605 1
            return '';
7606
        }
7607
7608 1
        if ($encoding === 'UTF-8') {
7609 1
            $offset = \mb_strpos($str, $separator);
7610 1
            if ($offset === false) {
7611 1
                return '';
7612
            }
7613
7614 1
            return (string) \mb_substr(
7615 1
                $str,
7616 1
                0,
7617 1
                $offset
7618
            );
7619
        }
7620
7621
        $offset = self::strpos($str, $separator, 0, $encoding);
7622
        if ($offset === false) {
7623
            return '';
7624
        }
7625
7626
        return (string) self::substr(
7627
            $str,
7628
            0,
7629
            $offset,
7630
            $encoding
7631
        );
7632
    }
7633
7634
    /**
7635
     * Gets the substring before the last occurrence of a separator.
7636
     *
7637
     * @param string $str       <p>The input string.</p>
7638
     * @param string $separator <p>The string separator.</p>
7639
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7640
     *
7641
     * @return string
7642
     */
7643
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7644
    {
7645 1
        if ($separator === '' || $str === '') {
7646 1
            return '';
7647
        }
7648
7649 1
        if ($encoding === 'UTF-8') {
7650 1
            $offset = \mb_strrpos($str, $separator);
7651 1
            if ($offset === false) {
7652 1
                return '';
7653
            }
7654
7655 1
            return (string) \mb_substr(
7656 1
                $str,
7657 1
                0,
7658 1
                $offset
7659
            );
7660
        }
7661
7662
        $offset = self::strrpos($str, $separator, 0, $encoding);
7663
        if ($offset === false) {
7664
            return '';
7665
        }
7666
7667
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7668
7669
        return (string) self::substr(
7670
            $str,
7671
            0,
7672
            $offset,
7673
            $encoding
7674
        );
7675
    }
7676
7677
    /**
7678
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7679
     *
7680
     * @param string $str           <p>The input string.</p>
7681
     * @param string $needle        <p>The string to look for.</p>
7682
     * @param bool   $before_needle [optional] <p>Default: false</p>
7683
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7684
     *
7685
     * @return string
7686
     */
7687
    public static function str_substr_first(
7688
        string $str,
7689
        string $needle,
7690
        bool $before_needle = false,
7691
        string $encoding = 'UTF-8'
7692
    ): string {
7693 2
        if ($str === '' || $needle === '') {
7694 2
            return '';
7695
        }
7696
7697 2
        if ($encoding === 'UTF-8') {
7698 2
            if ($before_needle === true) {
7699 1
                $part = \mb_strstr(
7700 1
                    $str,
7701 1
                    $needle,
7702 1
                    $before_needle
7703
                );
7704
            } else {
7705 1
                $part = \mb_strstr(
7706 1
                    $str,
7707 2
                    $needle
7708
                );
7709
            }
7710
        } else {
7711
            $part = self::strstr(
7712
                $str,
7713
                $needle,
7714
                $before_needle,
7715
                $encoding
7716
            );
7717
        }
7718
7719 2
        return $part === false ? '' : $part;
7720
    }
7721
7722
    /**
7723
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7724
     *
7725
     * @param string $str           <p>The input string.</p>
7726
     * @param string $needle        <p>The string to look for.</p>
7727
     * @param bool   $before_needle [optional] <p>Default: false</p>
7728
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7729
     *
7730
     * @return string
7731
     */
7732
    public static function str_substr_last(
7733
        string $str,
7734
        string $needle,
7735
        bool $before_needle = false,
7736
        string $encoding = 'UTF-8'
7737
    ): string {
7738 2
        if ($str === '' || $needle === '') {
7739 2
            return '';
7740
        }
7741
7742 2
        if ($encoding === 'UTF-8') {
7743 2
            if ($before_needle === true) {
7744 1
                $part = \mb_strrchr(
7745 1
                    $str,
7746 1
                    $needle,
7747 1
                    $before_needle
7748
                );
7749
            } else {
7750 1
                $part = \mb_strrchr(
7751 1
                    $str,
7752 2
                    $needle
7753
                );
7754
            }
7755
        } else {
7756
            $part = self::strrchr(
7757
                $str,
7758
                $needle,
7759
                $before_needle,
7760
                $encoding
7761
            );
7762
        }
7763
7764 2
        return $part === false ? '' : $part;
7765
    }
7766
7767
    /**
7768
     * Surrounds $str with the given substring.
7769
     *
7770
     * @param string $str
7771
     * @param string $substring <p>The substring to add to both sides.</P>
7772
     *
7773
     * @return string
7774
     *                <p>A string with the substring both prepended and appended.</p>
7775
     */
7776
    public static function str_surround(string $str, string $substring): string
7777
    {
7778 5
        return $substring . $str . $substring;
7779
    }
7780
7781
    /**
7782
     * Returns a trimmed string with the first letter of each word capitalized.
7783
     * Also accepts an array, $ignore, allowing you to list words not to be
7784
     * capitalized.
7785
     *
7786
     * @param string              $str
7787
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7788
     *                                                           Default: null</p>
7789
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7790
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7791
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7792
     *                                                           tr</p>
7793
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7794
     *                                                           ß</p>
7795
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7796
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7797
     *
7798
     * @return string
7799
     *                <p>The titleized string.</p>
7800
     */
7801
    public static function str_titleize(
7802
        string $str,
7803
        array $ignore = null,
7804
        string $encoding = 'UTF-8',
7805
        bool $clean_utf8 = false,
7806
        string $lang = null,
7807
        bool $try_to_keep_the_string_length = false,
7808
        bool $use_trim_first = true,
7809
        string $word_define_chars = null
7810
    ): string {
7811 10
        if ($str === '') {
7812
            return '';
7813
        }
7814
7815 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7816 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7817
        }
7818
7819 10
        if ($use_trim_first === true) {
7820 10
            $str = \trim($str);
7821
        }
7822
7823 10
        if ($clean_utf8 === true) {
7824
            $str = self::clean($str);
7825
        }
7826
7827 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7828
7829 10
        if ($word_define_chars) {
7830 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7831
        } else {
7832 6
            $word_define_chars = '';
7833
        }
7834
7835 10
        $str = (string) \preg_replace_callback(
7836 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7837
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7838 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7839 4
                    return $match[0];
7840
                }
7841
7842 10
                if ($use_mb_functions === true) {
7843 10
                    if ($encoding === 'UTF-8') {
7844 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7845 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7846
                    }
7847
7848
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7849
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7850
                }
7851
7852
                return self::ucfirst(
7853
                    self::strtolower(
7854
                        $match[0],
7855
                        $encoding,
7856
                        false,
7857
                        $lang,
7858
                        $try_to_keep_the_string_length
7859
                    ),
7860
                    $encoding,
7861
                    false,
7862
                    $lang,
7863
                    $try_to_keep_the_string_length
7864
                );
7865 10
            },
7866 10
            $str
7867
        );
7868
7869 10
        return $str;
7870
    }
7871
7872
    /**
7873
     * Returns a trimmed string in proper title case.
7874
     *
7875
     * Also accepts an array, $ignore, allowing you to list words not to be
7876
     * capitalized.
7877
     *
7878
     * Adapted from John Gruber's script.
7879
     *
7880
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7881
     *
7882
     * @param string $str
7883
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7884
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7885
     *
7886
     * @return string
7887
     *                <p>The titleized string.</p>
7888
     */
7889
    public static function str_titleize_for_humans(
7890
        string $str,
7891
        array $ignore = [],
7892
        string $encoding = 'UTF-8'
7893
    ): string {
7894 35
        if ($str === '') {
7895
            return '';
7896
        }
7897
7898
        $small_words = [
7899 35
            '(?<!q&)a',
7900
            'an',
7901
            'and',
7902
            'as',
7903
            'at(?!&t)',
7904
            'but',
7905
            'by',
7906
            'en',
7907
            'for',
7908
            'if',
7909
            'in',
7910
            'of',
7911
            'on',
7912
            'or',
7913
            'the',
7914
            'to',
7915
            'v[.]?',
7916
            'via',
7917
            'vs[.]?',
7918
        ];
7919
7920 35
        if ($ignore !== []) {
7921 1
            $small_words = \array_merge($small_words, $ignore);
7922
        }
7923
7924 35
        $small_words_rx = \implode('|', $small_words);
7925 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7926
7927 35
        $str = \trim($str);
7928
7929 35
        if (self::has_lowercase($str) === false) {
7930 2
            $str = self::strtolower($str, $encoding);
7931
        }
7932
7933
        // the main substitutions
7934 35
        $str = (string) \preg_replace_callback(
7935
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7936
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7937 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7938
                        |
7939 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7940
                        |
7941 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7942
                        |
7943 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7944
                      ) (_*) \\b                                                          # 6. With trailing underscore
7945
                    ~ux',
7946
            /**
7947
             * @param string[] $matches
7948
             *
7949
             * @return string
7950
             */
7951
            static function (array $matches) use ($encoding): string {
7952
                // preserve leading underscore
7953 35
                $str = $matches[1];
7954 35
                if ($matches[2]) {
7955
                    // preserve URLs, domains, emails and file paths
7956 5
                    $str .= $matches[2];
7957 35
                } elseif ($matches[3]) {
7958
                    // lower-case small words
7959 25
                    $str .= self::strtolower($matches[3], $encoding);
7960 35
                } elseif ($matches[4]) {
7961
                    // capitalize word w/o internal caps
7962 34
                    $str .= static::ucfirst($matches[4], $encoding);
7963
                } else {
7964
                    // preserve other kinds of word (iPhone)
7965 7
                    $str .= $matches[5];
7966
                }
7967
                // preserve trailing underscore
7968 35
                $str .= $matches[6];
7969
7970 35
                return $str;
7971 35
            },
7972 35
            $str
7973
        );
7974
7975
        // Exceptions for small words: capitalize at start of title...
7976 35
        $str = (string) \preg_replace_callback(
7977
            '~(  \\A [[:punct:]]*            # start of title...
7978
                      |  [:.;?!][ ]+                # or of subsentence...
7979
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7980 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7981
                     ~uxi',
7982
            /**
7983
             * @param string[] $matches
7984
             *
7985
             * @return string
7986
             */
7987
            static function (array $matches) use ($encoding): string {
7988 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7989 35
            },
7990 35
            $str
7991
        );
7992
7993
        // ...and end of title
7994 35
        $str = (string) \preg_replace_callback(
7995 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
7996
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7997
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7998
                     ~uxi',
7999
            /**
8000
             * @param string[] $matches
8001
             *
8002
             * @return string
8003
             */
8004
            static function (array $matches) use ($encoding): string {
8005 3
                return static::ucfirst($matches[1], $encoding);
8006 35
            },
8007 35
            $str
8008
        );
8009
8010
        // Exceptions for small words in hyphenated compound words.
8011
        // e.g. "in-flight" -> In-Flight
8012 35
        $str = (string) \preg_replace_callback(
8013
            '~\\b
8014
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
8015 35
                        ( ' . $small_words_rx . ' )
8016
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
8017
                       ~uxi',
8018
            /**
8019
             * @param string[] $matches
8020
             *
8021
             * @return string
8022
             */
8023
            static function (array $matches) use ($encoding): string {
8024
                return static::ucfirst($matches[1], $encoding);
8025 35
            },
8026 35
            $str
8027
        );
8028
8029
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8030 35
        $str = (string) \preg_replace_callback(
8031
            '~\\b
8032
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8033
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8034 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8035
                      (?!	- )                 # Negative lookahead for another -
8036
                     ~uxi',
8037
            /**
8038
             * @param string[] $matches
8039
             *
8040
             * @return string
8041
             */
8042
            static function (array $matches) use ($encoding): string {
8043
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8044 35
            },
8045 35
            $str
8046
        );
8047
8048 35
        return $str;
8049
    }
8050
8051
    /**
8052
     * Get a binary representation of a specific string.
8053
     *
8054
     * @param string $str <p>The input string.</p>
8055
     *
8056
     * @return false|string
8057
     *                      <p>false on error</p>
8058
     */
8059
    public static function str_to_binary(string $str)
8060
    {
8061
        /** @var array|false $value - needed for PhpStan (stubs error) */
8062 2
        $value = \unpack('H*', $str);
8063 2
        if ($value === false) {
8064
            return false;
8065
        }
8066
8067
        /** @noinspection OffsetOperationsInspection */
8068 2
        return \base_convert($value[1], 16, 2);
8069
    }
8070
8071
    /**
8072
     * @param string   $str
8073
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8074
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8075
     *
8076
     * @return string[]
8077
     */
8078
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8079
    {
8080 17
        if ($str === '') {
8081 1
            return $remove_empty_values === true ? [] : [''];
8082
        }
8083
8084 16
        if (self::$SUPPORT['mbstring'] === true) {
8085
            /** @noinspection PhpComposerExtensionStubsInspection */
8086 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8087
        } else {
8088
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8089
        }
8090
8091 16
        if ($return === false) {
8092
            return $remove_empty_values === true ? [] : [''];
8093
        }
8094
8095
        if (
8096 16
            $remove_short_values === null
8097
            &&
8098 16
            $remove_empty_values === false
8099
        ) {
8100 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8101
        }
8102
8103
        return self::reduce_string_array(
8104
            $return,
8105
            $remove_empty_values,
8106
            $remove_short_values
8107
        );
8108
    }
8109
8110
    /**
8111
     * Convert a string into an array of words.
8112
     *
8113
     * @param string   $str
8114
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8115
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8116
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8117
     *
8118
     * @return string[]
8119
     */
8120
    public static function str_to_words(
8121
        string $str,
8122
        string $char_list = '',
8123
        bool $remove_empty_values = false,
8124
        int $remove_short_values = null
8125
    ): array {
8126 13
        if ($str === '') {
8127 4
            return $remove_empty_values === true ? [] : [''];
8128
        }
8129
8130 13
        $char_list = self::rxClass($char_list, '\pL');
8131
8132 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8133 13
        if ($return === false) {
8134
            return $remove_empty_values === true ? [] : [''];
8135
        }
8136
8137
        if (
8138 13
            $remove_short_values === null
8139
            &&
8140 13
            $remove_empty_values === false
8141
        ) {
8142 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8143
        }
8144
8145 2
        $tmp_return = self::reduce_string_array(
8146 2
            $return,
8147 2
            $remove_empty_values,
8148 2
            $remove_short_values
8149
        );
8150
8151 2
        foreach ($tmp_return as &$item) {
8152 2
            $item = (string) $item;
8153
        }
8154
8155 2
        return $tmp_return;
8156
    }
8157
8158
    /**
8159
     * alias for "UTF8::to_ascii()"
8160
     *
8161
     * @param string $str
8162
     * @param string $unknown
8163
     * @param bool   $strict
8164
     *
8165
     * @return string
8166
     *
8167
     * @see UTF8::to_ascii()
8168
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
8169
     */
8170
    public static function str_transliterate(
8171
        string $str,
8172
        string $unknown = '?',
8173
        bool $strict = false
8174
    ): string {
8175 7
        return self::to_ascii($str, $unknown, $strict);
8176
    }
8177
8178
    /**
8179
     * Truncates the string to a given length. If $substring is provided, and
8180
     * truncating occurs, the string is further truncated so that the substring
8181
     * may be appended without exceeding the desired length.
8182
     *
8183
     * @param string $str
8184
     * @param int    $length    <p>Desired length of the truncated string.</p>
8185
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8186
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8187
     *
8188
     * @return string
8189
     *                <p>A string after truncating.</p>
8190
     */
8191
    public static function str_truncate(
8192
        string $str,
8193
        int $length,
8194
        string $substring = '',
8195
        string $encoding = 'UTF-8'
8196
    ): string {
8197 22
        if ($str === '') {
8198
            return '';
8199
        }
8200
8201 22
        if ($encoding === 'UTF-8') {
8202 10
            if ($length >= (int) \mb_strlen($str)) {
8203 2
                return $str;
8204
            }
8205
8206 8
            if ($substring !== '') {
8207 4
                $length -= (int) \mb_strlen($substring);
8208
8209
                /** @noinspection UnnecessaryCastingInspection */
8210 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8211
            }
8212
8213
            /** @noinspection UnnecessaryCastingInspection */
8214 4
            return (string) \mb_substr($str, 0, $length);
8215
        }
8216
8217 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8218
8219 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8220 2
            return $str;
8221
        }
8222
8223 10
        if ($substring !== '') {
8224 6
            $length -= (int) self::strlen($substring, $encoding);
8225
        }
8226
8227
        return (
8228 10
               (string) self::substr(
8229 10
                   $str,
8230 10
                   0,
8231 10
                   $length,
8232 10
                   $encoding
8233
               )
8234 10
               ) . $substring;
8235
    }
8236
8237
    /**
8238
     * Truncates the string to a given length, while ensuring that it does not
8239
     * split words. If $substring is provided, and truncating occurs, the
8240
     * string is further truncated so that the substring may be appended without
8241
     * exceeding the desired length.
8242
     *
8243
     * @param string $str
8244
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8245
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8246
     *                                                       ''</p>
8247
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8248
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8249
     *
8250
     * @return string
8251
     *                <p>A string after truncating.</p>
8252
     */
8253
    public static function str_truncate_safe(
8254
        string $str,
8255
        int $length,
8256
        string $substring = '',
8257
        string $encoding = 'UTF-8',
8258
        bool $ignore_do_not_split_words_for_one_word = false
8259
    ): string {
8260 47
        if ($str === '' || $length <= 0) {
8261 1
            return $substring;
8262
        }
8263
8264 47
        if ($encoding === 'UTF-8') {
8265 21
            if ($length >= (int) \mb_strlen($str)) {
8266 5
                return $str;
8267
            }
8268
8269
            // need to further trim the string so we can append the substring
8270 17
            $length -= (int) \mb_strlen($substring);
8271 17
            if ($length <= 0) {
8272 1
                return $substring;
8273
            }
8274
8275
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8276 17
            $truncated = \mb_substr($str, 0, $length);
8277 17
            if ($truncated === false) {
8278
                return '';
8279
            }
8280
8281
            // if the last word was truncated
8282 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8283 17
            if ($space_position !== $length) {
8284
                // find pos of the last occurrence of a space, get up to that
8285 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8286
8287
                if (
8288 13
                    $last_position !== false
8289
                    ||
8290 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8291
                ) {
8292 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8293
                }
8294
            }
8295
        } else {
8296 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8297
8298 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8299 4
                return $str;
8300
            }
8301
8302
            // need to further trim the string so we can append the substring
8303 22
            $length -= (int) self::strlen($substring, $encoding);
8304 22
            if ($length <= 0) {
8305
                return $substring;
8306
            }
8307
8308 22
            $truncated = self::substr($str, 0, $length, $encoding);
8309
8310 22
            if ($truncated === false) {
8311
                return '';
8312
            }
8313
8314
            // if the last word was truncated
8315 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8316 22
            if ($space_position !== $length) {
8317
                // find pos of the last occurrence of a space, get up to that
8318 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8319
8320
                if (
8321 12
                    $last_position !== false
8322
                    ||
8323 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8324
                ) {
8325 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8326
                }
8327
            }
8328
        }
8329
8330 39
        return $truncated . $substring;
8331
    }
8332
8333
    /**
8334
     * Returns a lowercase and trimmed string separated by underscores.
8335
     * Underscores are inserted before uppercase characters (with the exception
8336
     * of the first character of the string), and in place of spaces as well as
8337
     * dashes.
8338
     *
8339
     * @param string $str
8340
     *
8341
     * @return string
8342
     *                <p>The underscored string.</p>
8343
     */
8344
    public static function str_underscored(string $str): string
8345
    {
8346 16
        return self::str_delimit($str, '_');
8347
    }
8348
8349
    /**
8350
     * Returns an UpperCamelCase version of the supplied string. It trims
8351
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8352
     * and underscores, and removes spaces, dashes, underscores.
8353
     *
8354
     * @param string      $str                           <p>The input string.</p>
8355
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8356
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8357
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8358
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8359
     *
8360
     * @return string
8361
     *                <p>A string in UpperCamelCase.</p>
8362
     */
8363
    public static function str_upper_camelize(
8364
        string $str,
8365
        string $encoding = 'UTF-8',
8366
        bool $clean_utf8 = false,
8367
        string $lang = null,
8368
        bool $try_to_keep_the_string_length = false
8369
    ): string {
8370 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8371
    }
8372
8373
    /**
8374
     * alias for "UTF8::ucfirst()"
8375
     *
8376
     * @param string      $str
8377
     * @param string      $encoding
8378
     * @param bool        $clean_utf8
8379
     * @param string|null $lang
8380
     * @param bool        $try_to_keep_the_string_length
8381
     *
8382
     * @return string
8383
     *
8384
     * @see UTF8::ucfirst()
8385
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8386
     */
8387
    public static function str_upper_first(
8388
        string $str,
8389
        string $encoding = 'UTF-8',
8390
        bool $clean_utf8 = false,
8391
        string $lang = null,
8392
        bool $try_to_keep_the_string_length = false
8393
    ): string {
8394 5
        return self::ucfirst(
8395 5
            $str,
8396 5
            $encoding,
8397 5
            $clean_utf8,
8398 5
            $lang,
8399 5
            $try_to_keep_the_string_length
8400
        );
8401
    }
8402
8403
    /**
8404
     * Get the number of words in a specific string.
8405
     *
8406
     * @param string $str       <p>The input string.</p>
8407
     * @param int    $format    [optional] <p>
8408
     *                          <strong>0</strong> => return a number of words (default)<br>
8409
     *                          <strong>1</strong> => return an array of words<br>
8410
     *                          <strong>2</strong> => return an array of words with word-offset as key
8411
     *                          </p>
8412
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8413
     *
8414
     * @return int|string[] The number of words in the string
8415
     */
8416
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8417
    {
8418 2
        $str_parts = self::str_to_words($str, $char_list);
8419
8420 2
        $len = \count($str_parts);
8421
8422 2
        if ($format === 1) {
8423 2
            $number_of_words = [];
8424 2
            for ($i = 1; $i < $len; $i += 2) {
8425 2
                $number_of_words[] = $str_parts[$i];
8426
            }
8427 2
        } elseif ($format === 2) {
8428 2
            $number_of_words = [];
8429 2
            $offset = (int) self::strlen($str_parts[0]);
8430 2
            for ($i = 1; $i < $len; $i += 2) {
8431 2
                $number_of_words[$offset] = $str_parts[$i];
8432 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8433
            }
8434
        } else {
8435 2
            $number_of_words = (int) (($len - 1) / 2);
8436
        }
8437
8438 2
        return $number_of_words;
8439
    }
8440
8441
    /**
8442
     * Case-insensitive string comparison.
8443
     *
8444
     * INFO: Case-insensitive version of UTF8::strcmp()
8445
     *
8446
     * @param string $str1     <p>The first string.</p>
8447
     * @param string $str2     <p>The second string.</p>
8448
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8449
     *
8450
     * @return int
8451
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8452
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8453
     *             <strong>0</strong> if they are equal
8454
     */
8455
    public static function strcasecmp(
8456
        string $str1,
8457
        string $str2,
8458
        string $encoding = 'UTF-8'
8459
    ): int {
8460 23
        return self::strcmp(
8461 23
            self::strtocasefold(
8462 23
                $str1,
8463 23
                true,
8464 23
                false,
8465 23
                $encoding,
8466 23
                null,
8467 23
                false
8468
            ),
8469 23
            self::strtocasefold(
8470 23
                $str2,
8471 23
                true,
8472 23
                false,
8473 23
                $encoding,
8474 23
                null,
8475 23
                false
8476
            )
8477
        );
8478
    }
8479
8480
    /**
8481
     * alias for "UTF8::strstr()"
8482
     *
8483
     * @param string $haystack
8484
     * @param string $needle
8485
     * @param bool   $before_needle
8486
     * @param string $encoding
8487
     * @param bool   $clean_utf8
8488
     *
8489
     * @return false|string
8490
     *
8491
     * @see UTF8::strstr()
8492
     * @deprecated <p>please use "UTF8::strstr()"</p>
8493
     */
8494
    public static function strchr(
8495
        string $haystack,
8496
        string $needle,
8497
        bool $before_needle = false,
8498
        string $encoding = 'UTF-8',
8499
        bool $clean_utf8 = false
8500
    ) {
8501 2
        return self::strstr(
8502 2
            $haystack,
8503 2
            $needle,
8504 2
            $before_needle,
8505 2
            $encoding,
8506 2
            $clean_utf8
8507
        );
8508
    }
8509
8510
    /**
8511
     * Case-sensitive string comparison.
8512
     *
8513
     * @param string $str1 <p>The first string.</p>
8514
     * @param string $str2 <p>The second string.</p>
8515
     *
8516
     * @return int
8517
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8518
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8519
     *             <strong>0</strong> if they are equal
8520
     */
8521
    public static function strcmp(string $str1, string $str2): int
8522
    {
8523 29
        if ($str1 === $str2) {
8524 21
            return 0;
8525
        }
8526
8527 24
        return \strcmp(
8528 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8529 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8530
        );
8531
    }
8532
8533
    /**
8534
     * Find length of initial segment not matching mask.
8535
     *
8536
     * @param string $str
8537
     * @param string $char_list
8538
     * @param int    $offset
8539
     * @param int    $length
8540
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8541
     *
8542
     * @return int
8543
     */
8544
    public static function strcspn(
8545
        string $str,
8546
        string $char_list,
8547
        int $offset = null,
8548
        int $length = null,
8549
        string $encoding = 'UTF-8'
8550
    ): int {
8551 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8552
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8553
        }
8554
8555 12
        if ($char_list === '') {
8556 2
            return (int) self::strlen($str, $encoding);
8557
        }
8558
8559 11
        if ($offset !== null || $length !== null) {
8560 3
            if ($encoding === 'UTF-8') {
8561 3
                if ($length === null) {
8562
                    /** @noinspection UnnecessaryCastingInspection */
8563 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8564
                } else {
8565
                    /** @noinspection UnnecessaryCastingInspection */
8566 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8567
                }
8568
            } else {
8569
                /** @noinspection UnnecessaryCastingInspection */
8570
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8571
            }
8572
8573 3
            if ($str_tmp === false) {
8574
                return 0;
8575
            }
8576
8577
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8578 3
            $str = $str_tmp;
8579
        }
8580
8581 11
        if ($str === '') {
8582 2
            return 0;
8583
        }
8584
8585 10
        $matches = [];
8586 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8587 9
            $return = self::strlen($matches[1], $encoding);
8588 9
            if ($return === false) {
8589
                return 0;
8590
            }
8591
8592 9
            return $return;
8593
        }
8594
8595 2
        return (int) self::strlen($str, $encoding);
8596
    }
8597
8598
    /**
8599
     * alias for "UTF8::stristr()"
8600
     *
8601
     * @param string $haystack
8602
     * @param string $needle
8603
     * @param bool   $before_needle
8604
     * @param string $encoding
8605
     * @param bool   $clean_utf8
8606
     *
8607
     * @return false|string
8608
     *
8609
     * @see UTF8::stristr()
8610
     * @deprecated <p>please use "UTF8::stristr()"</p>
8611
     */
8612
    public static function strichr(
8613
        string $haystack,
8614
        string $needle,
8615
        bool $before_needle = false,
8616
        string $encoding = 'UTF-8',
8617
        bool $clean_utf8 = false
8618
    ) {
8619 1
        return self::stristr(
8620 1
            $haystack,
8621 1
            $needle,
8622 1
            $before_needle,
8623 1
            $encoding,
8624 1
            $clean_utf8
8625
        );
8626
    }
8627
8628
    /**
8629
     * Create a UTF-8 string from code points.
8630
     *
8631
     * INFO: opposite to UTF8::codepoints()
8632
     *
8633
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8634
     *
8635
     * @return string
8636
     *                <p>A UTF-8 encoded string.</p>
8637
     */
8638
    public static function string(array $array): string
8639
    {
8640 4
        if ($array === []) {
8641 4
            return  '';
8642
        }
8643
8644 4
        $str = '';
8645 4
        foreach ($array as $strPart) {
8646 4
            $str .= '&#' . (int) $strPart . ';';
8647
        }
8648
8649 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
8650
    }
8651
8652
    /**
8653
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8654
     *
8655
     * @param string $str <p>The input string.</p>
8656
     *
8657
     * @return bool
8658
     *              <strong>true</strong> if the string has BOM at the start,<br>
8659
     *              <strong>false</strong> otherwise
8660
     */
8661
    public static function string_has_bom(string $str): bool
8662
    {
8663
        /** @noinspection PhpUnusedLocalVariableInspection */
8664 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8665 6
            if (\strpos($str, $bom_string) === 0) {
8666 6
                return true;
8667
            }
8668
        }
8669
8670 6
        return false;
8671
    }
8672
8673
    /**
8674
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8675
     *
8676
     * @see http://php.net/manual/en/function.strip-tags.php
8677
     *
8678
     * @param string $str            <p>
8679
     *                               The input string.
8680
     *                               </p>
8681
     * @param string $allowable_tags [optional] <p>
8682
     *                               You can use the optional second parameter to specify tags which should
8683
     *                               not be stripped.
8684
     *                               </p>
8685
     *                               <p>
8686
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8687
     *                               can not be changed with allowable_tags.
8688
     *                               </p>
8689
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8690
     *
8691
     * @return string
8692
     *                <p>The stripped string.</p>
8693
     */
8694
    public static function strip_tags(
8695
        string $str,
8696
        string $allowable_tags = null,
8697
        bool $clean_utf8 = false
8698
    ): string {
8699 4
        if ($str === '') {
8700 1
            return '';
8701
        }
8702
8703 4
        if ($clean_utf8 === true) {
8704 2
            $str = self::clean($str);
8705
        }
8706
8707 4
        if ($allowable_tags === null) {
8708 4
            return \strip_tags($str);
8709
        }
8710
8711 2
        return \strip_tags($str, $allowable_tags);
8712
    }
8713
8714
    /**
8715
     * Strip all whitespace characters. This includes tabs and newline
8716
     * characters, as well as multibyte whitespace such as the thin space
8717
     * and ideographic space.
8718
     *
8719
     * @param string $str
8720
     *
8721
     * @return string
8722
     */
8723
    public static function strip_whitespace(string $str): string
8724
    {
8725 36
        if ($str === '') {
8726 3
            return '';
8727
        }
8728
8729 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8730
    }
8731
8732
    /**
8733
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
8734
     *
8735
     * @see http://php.net/manual/en/function.mb-stripos.php
8736
     *
8737
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8738
     * @param string $needle     <p>The string to find in haystack.</p>
8739
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8740
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8741
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8742
     *
8743
     * @return false|int
8744
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8745
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8746
     */
8747
    public static function stripos(
8748
        string $haystack,
8749
        string $needle,
8750
        int $offset = 0,
8751
        $encoding = 'UTF-8',
8752
        bool $clean_utf8 = false
8753
    ) {
8754 24
        if ($haystack === '' || $needle === '') {
8755 5
            return false;
8756
        }
8757
8758 23
        if ($clean_utf8 === true) {
8759
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8760
            // if invalid characters are found in $haystack before $needle
8761 1
            $haystack = self::clean($haystack);
8762 1
            $needle = self::clean($needle);
8763
        }
8764
8765 23
        if (self::$SUPPORT['mbstring'] === true) {
8766 23
            if ($encoding === 'UTF-8') {
8767 23
                return \mb_stripos($haystack, $needle, $offset);
8768
            }
8769
8770 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8771
8772 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8773
        }
8774
8775 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8776
8777
        if (
8778 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8779
            &&
8780 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8781
            &&
8782 2
            self::$SUPPORT['intl'] === true
8783
        ) {
8784
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8785
            if ($return_tmp !== false) {
8786
                return $return_tmp;
8787
            }
8788
        }
8789
8790
        //
8791
        // fallback for ascii only
8792
        //
8793
8794 2
        if (ASCII::is_ascii($haystack . $needle)) {
8795
            return \stripos($haystack, $needle, $offset);
8796
        }
8797
8798
        //
8799
        // fallback via vanilla php
8800
        //
8801
8802 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8803 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8804
8805 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8806
    }
8807
8808
    /**
8809
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8810
     *
8811
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8812
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8813
     * @param bool   $before_needle [optional] <p>
8814
     *                              If <b>TRUE</b>, it returns the part of the
8815
     *                              haystack before the first occurrence of the needle (excluding the needle).
8816
     *                              </p>
8817
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8818
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8819
     *
8820
     * @return false|string
8821
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8822
     */
8823
    public static function stristr(
8824
        string $haystack,
8825
        string $needle,
8826
        bool $before_needle = false,
8827
        string $encoding = 'UTF-8',
8828
        bool $clean_utf8 = false
8829
    ) {
8830 12
        if ($haystack === '' || $needle === '') {
8831 3
            return false;
8832
        }
8833
8834 9
        if ($clean_utf8 === true) {
8835
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8836
            // if invalid characters are found in $haystack before $needle
8837 1
            $needle = self::clean($needle);
8838 1
            $haystack = self::clean($haystack);
8839
        }
8840
8841 9
        if (!$needle) {
8842
            return $haystack;
8843
        }
8844
8845 9
        if (self::$SUPPORT['mbstring'] === true) {
8846 9
            if ($encoding === 'UTF-8') {
8847 9
                return \mb_stristr($haystack, $needle, $before_needle);
8848
            }
8849
8850 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8851
8852 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8853
        }
8854
8855
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8856
8857
        if (
8858
            $encoding !== 'UTF-8'
8859
            &&
8860
            self::$SUPPORT['mbstring'] === false
8861
        ) {
8862
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8863
        }
8864
8865
        if (
8866
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8867
            &&
8868
            self::$SUPPORT['intl'] === true
8869
        ) {
8870
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8871
            if ($return_tmp !== false) {
8872
                return $return_tmp;
8873
            }
8874
        }
8875
8876
        if (ASCII::is_ascii($needle . $haystack)) {
8877
            return \stristr($haystack, $needle, $before_needle);
8878
        }
8879
8880
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8881
8882
        if (!isset($match[1])) {
8883
            return false;
8884
        }
8885
8886
        if ($before_needle) {
8887
            return $match[1];
8888
        }
8889
8890
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8891
    }
8892
8893
    /**
8894
     * Get the string length, not the byte-length!
8895
     *
8896
     * @see http://php.net/manual/en/function.mb-strlen.php
8897
     *
8898
     * @param string $str        <p>The string being checked for length.</p>
8899
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8900
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8901
     *
8902
     * @return false|int
8903
     *                   <p>
8904
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8905
     *                   $encoding.
8906
     *                   (One multi-byte character counted as +1).
8907
     *                   <br>
8908
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8909
     *                   chars.
8910
     *                   </p>
8911
     */
8912
    public static function strlen(
8913
        string $str,
8914
        string $encoding = 'UTF-8',
8915
        bool $clean_utf8 = false
8916
    ) {
8917 173
        if ($str === '') {
8918 21
            return 0;
8919
        }
8920
8921 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8922 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8923
        }
8924
8925 171
        if ($clean_utf8 === true) {
8926
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8927
            // if invalid characters are found in $str
8928 4
            $str = self::clean($str);
8929
        }
8930
8931
        //
8932
        // fallback via mbstring
8933
        //
8934
8935 171
        if (self::$SUPPORT['mbstring'] === true) {
8936 165
            if ($encoding === 'UTF-8') {
8937 165
                return \mb_strlen($str);
8938
            }
8939
8940 4
            return \mb_strlen($str, $encoding);
8941
        }
8942
8943
        //
8944
        // fallback for binary || ascii only
8945
        //
8946
8947
        if (
8948 8
            $encoding === 'CP850'
8949
            ||
8950 8
            $encoding === 'ASCII'
8951
        ) {
8952
            return \strlen($str);
8953
        }
8954
8955
        if (
8956 8
            $encoding !== 'UTF-8'
8957
            &&
8958 8
            self::$SUPPORT['mbstring'] === false
8959
            &&
8960 8
            self::$SUPPORT['iconv'] === false
8961
        ) {
8962 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8963
        }
8964
8965
        //
8966
        // fallback via iconv
8967
        //
8968
8969 8
        if (self::$SUPPORT['iconv'] === true) {
8970
            $return_tmp = \iconv_strlen($str, $encoding);
8971
            if ($return_tmp !== false) {
8972
                return $return_tmp;
8973
            }
8974
        }
8975
8976
        //
8977
        // fallback via intl
8978
        //
8979
8980
        if (
8981 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8982
            &&
8983 8
            self::$SUPPORT['intl'] === true
8984
        ) {
8985
            $return_tmp = \grapheme_strlen($str);
8986
            if ($return_tmp !== null) {
8987
                return $return_tmp;
8988
            }
8989
        }
8990
8991
        //
8992
        // fallback for ascii only
8993
        //
8994
8995 8
        if (ASCII::is_ascii($str)) {
8996 4
            return \strlen($str);
8997
        }
8998
8999
        //
9000
        // fallback via vanilla php
9001
        //
9002
9003 8
        \preg_match_all('/./us', $str, $parts);
9004
9005 8
        $return_tmp = \count($parts[0]);
9006 8
        if ($return_tmp === 0) {
9007
            return false;
9008
        }
9009
9010 8
        return $return_tmp;
9011
    }
9012
9013
    /**
9014
     * Get string length in byte.
9015
     *
9016
     * @param string $str
9017
     *
9018
     * @return int
9019
     */
9020
    public static function strlen_in_byte(string $str): int
9021
    {
9022
        if ($str === '') {
9023
            return 0;
9024
        }
9025
9026
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9027
            // "mb_" is available if overload is used, so use it ...
9028
            return \mb_strlen($str, 'CP850'); // 8-BIT
9029
        }
9030
9031
        return \strlen($str);
9032
    }
9033
9034
    /**
9035
     * Case-insensitive string comparisons using a "natural order" algorithm.
9036
     *
9037
     * INFO: natural order version of UTF8::strcasecmp()
9038
     *
9039
     * @param string $str1     <p>The first string.</p>
9040
     * @param string $str2     <p>The second string.</p>
9041
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9042
     *
9043
     * @return int
9044
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9045
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9046
     *             <strong>0</strong> if they are equal
9047
     */
9048
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9049
    {
9050 2
        return self::strnatcmp(
9051 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9052 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9053
        );
9054
    }
9055
9056
    /**
9057
     * String comparisons using a "natural order" algorithm
9058
     *
9059
     * INFO: natural order version of UTF8::strcmp()
9060
     *
9061
     * @see http://php.net/manual/en/function.strnatcmp.php
9062
     *
9063
     * @param string $str1 <p>The first string.</p>
9064
     * @param string $str2 <p>The second string.</p>
9065
     *
9066
     * @return int
9067
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9068
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9069
     *             <strong>0</strong> if they are equal
9070
     */
9071
    public static function strnatcmp(string $str1, string $str2): int
9072
    {
9073 4
        if ($str1 === $str2) {
9074 4
            return 0;
9075
        }
9076
9077 4
        return \strnatcmp(
9078 4
            (string) self::strtonatfold($str1),
9079 4
            (string) self::strtonatfold($str2)
9080
        );
9081
    }
9082
9083
    /**
9084
     * Case-insensitive string comparison of the first n characters.
9085
     *
9086
     * @see http://php.net/manual/en/function.strncasecmp.php
9087
     *
9088
     * @param string $str1     <p>The first string.</p>
9089
     * @param string $str2     <p>The second string.</p>
9090
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9091
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9092
     *
9093
     * @return int
9094
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9095
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9096
     *             <strong>0</strong> if they are equal
9097
     */
9098
    public static function strncasecmp(
9099
        string $str1,
9100
        string $str2,
9101
        int $len,
9102
        string $encoding = 'UTF-8'
9103
    ): int {
9104 2
        return self::strncmp(
9105 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9106 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9107 2
            $len
9108
        );
9109
    }
9110
9111
    /**
9112
     * String comparison of the first n characters.
9113
     *
9114
     * @see http://php.net/manual/en/function.strncmp.php
9115
     *
9116
     * @param string $str1     <p>The first string.</p>
9117
     * @param string $str2     <p>The second string.</p>
9118
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9119
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9120
     *
9121
     * @return int
9122
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9123
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9124
     *             <strong>0</strong> if they are equal
9125
     */
9126
    public static function strncmp(
9127
        string $str1,
9128
        string $str2,
9129
        int $len,
9130
        string $encoding = 'UTF-8'
9131
    ): int {
9132 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9133
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9134
        }
9135
9136 4
        if ($encoding === 'UTF-8') {
9137 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9138 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9139
        } else {
9140
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9141
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9142
        }
9143
9144 4
        return self::strcmp($str1, $str2);
9145
    }
9146
9147
    /**
9148
     * Search a string for any of a set of characters.
9149
     *
9150
     * @see http://php.net/manual/en/function.strpbrk.php
9151
     *
9152
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9153
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9154
     *
9155
     * @return false|string string starting from the character found, or false if it is not found
9156
     */
9157
    public static function strpbrk(string $haystack, string $char_list)
9158
    {
9159 2
        if ($haystack === '' || $char_list === '') {
9160 2
            return false;
9161
        }
9162
9163 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9164 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9165
        }
9166
9167 2
        return false;
9168
    }
9169
9170
    /**
9171
     * Find the position of the first occurrence of a substring in a string.
9172
     *
9173
     * @see http://php.net/manual/en/function.mb-strpos.php
9174
     *
9175
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9176
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9177
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9178
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9179
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9180
     *
9181
     * @return false|int
9182
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9183
     *                   string.<br> If needle is not found it returns false.
9184
     */
9185
    public static function strpos(
9186
        string $haystack,
9187
        $needle,
9188
        int $offset = 0,
9189
        $encoding = 'UTF-8',
9190
        bool $clean_utf8 = false
9191
    ) {
9192 53
        if ($haystack === '') {
9193 4
            return false;
9194
        }
9195
9196
        // iconv and mbstring do not support integer $needle
9197 52
        if ((int) $needle === $needle) {
9198
            $needle = (string) self::chr($needle);
9199
        }
9200 52
        $needle = (string) $needle;
9201
9202 52
        if ($needle === '') {
9203 2
            return false;
9204
        }
9205
9206 52
        if ($clean_utf8 === true) {
9207
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9208
            // if invalid characters are found in $haystack before $needle
9209 3
            $needle = self::clean($needle);
9210 3
            $haystack = self::clean($haystack);
9211
        }
9212
9213 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9214 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9215
        }
9216
9217
        //
9218
        // fallback via mbstring
9219
        //
9220
9221 52
        if (self::$SUPPORT['mbstring'] === true) {
9222 50
            if ($encoding === 'UTF-8') {
9223 50
                return \mb_strpos($haystack, $needle, $offset);
9224
            }
9225
9226 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9227
        }
9228
9229
        //
9230
        // fallback for binary || ascii only
9231
        //
9232
        if (
9233 4
            $encoding === 'CP850'
9234
            ||
9235 4
            $encoding === 'ASCII'
9236
        ) {
9237 2
            return \strpos($haystack, $needle, $offset);
9238
        }
9239
9240
        if (
9241 4
            $encoding !== 'UTF-8'
9242
            &&
9243 4
            self::$SUPPORT['iconv'] === false
9244
            &&
9245 4
            self::$SUPPORT['mbstring'] === false
9246
        ) {
9247 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9248
        }
9249
9250
        //
9251
        // fallback via intl
9252
        //
9253
9254
        if (
9255 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9256
            &&
9257 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9258
            &&
9259 4
            self::$SUPPORT['intl'] === true
9260
        ) {
9261
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9262
            if ($return_tmp !== false) {
9263
                return $return_tmp;
9264
            }
9265
        }
9266
9267
        //
9268
        // fallback via iconv
9269
        //
9270
9271
        if (
9272 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9273
            &&
9274 4
            self::$SUPPORT['iconv'] === true
9275
        ) {
9276
            // ignore invalid negative offset to keep compatibility
9277
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9278
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9279
            if ($return_tmp !== false) {
9280
                return $return_tmp;
9281
            }
9282
        }
9283
9284
        //
9285
        // fallback for ascii only
9286
        //
9287
9288 4
        if (ASCII::is_ascii($haystack . $needle)) {
9289 2
            return \strpos($haystack, $needle, $offset);
9290
        }
9291
9292
        //
9293
        // fallback via vanilla php
9294
        //
9295
9296 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9297 4
        if ($haystack_tmp === false) {
9298
            $haystack_tmp = '';
9299
        }
9300 4
        $haystack = (string) $haystack_tmp;
9301
9302 4
        if ($offset < 0) {
9303
            $offset = 0;
9304
        }
9305
9306 4
        $pos = \strpos($haystack, $needle);
9307 4
        if ($pos === false) {
9308 2
            return false;
9309
        }
9310
9311 4
        if ($pos) {
9312 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9313
        }
9314
9315 2
        return $offset + 0;
9316
    }
9317
9318
    /**
9319
     * Find the position of the first occurrence of a substring in a string.
9320
     *
9321
     * @param string $haystack <p>
9322
     *                         The string being checked.
9323
     *                         </p>
9324
     * @param string $needle   <p>
9325
     *                         The position counted from the beginning of haystack.
9326
     *                         </p>
9327
     * @param int    $offset   [optional] <p>
9328
     *                         The search offset. If it is not specified, 0 is used.
9329
     *                         </p>
9330
     *
9331
     * @return false|int The numeric position of the first occurrence of needle in the
9332
     *                   haystack string. If needle is not found, it returns false.
9333
     */
9334
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9335
    {
9336
        if ($haystack === '' || $needle === '') {
9337
            return false;
9338
        }
9339
9340
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9341
            // "mb_" is available if overload is used, so use it ...
9342
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9343
        }
9344
9345
        return \strpos($haystack, $needle, $offset);
9346
    }
9347
9348
    /**
9349
     * Find the last occurrence of a character in a string within another.
9350
     *
9351
     * @see http://php.net/manual/en/function.mb-strrchr.php
9352
     *
9353
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9354
     * @param string $needle        <p>The string to find in haystack</p>
9355
     * @param bool   $before_needle [optional] <p>
9356
     *                              Determines which portion of haystack
9357
     *                              this function returns.
9358
     *                              If set to true, it returns all of haystack
9359
     *                              from the beginning to the last occurrence of needle.
9360
     *                              If set to false, it returns all of haystack
9361
     *                              from the last occurrence of needle to the end,
9362
     *                              </p>
9363
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9364
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9365
     *
9366
     * @return false|string the portion of haystack or false if needle is not found
9367
     */
9368
    public static function strrchr(
9369
        string $haystack,
9370
        string $needle,
9371
        bool $before_needle = false,
9372
        string $encoding = 'UTF-8',
9373
        bool $clean_utf8 = false
9374
    ) {
9375 2
        if ($haystack === '' || $needle === '') {
9376 2
            return false;
9377
        }
9378
9379 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9380 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9381
        }
9382
9383 2
        if ($clean_utf8 === true) {
9384
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9385
            // if invalid characters are found in $haystack before $needle
9386 2
            $needle = self::clean($needle);
9387 2
            $haystack = self::clean($haystack);
9388
        }
9389
9390
        //
9391
        // fallback via mbstring
9392
        //
9393
9394 2
        if (self::$SUPPORT['mbstring'] === true) {
9395 2
            if ($encoding === 'UTF-8') {
9396 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9397
            }
9398
9399 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9400
        }
9401
9402
        //
9403
        // fallback for binary || ascii only
9404
        //
9405
9406
        if (
9407
            $before_needle === false
9408
            &&
9409
            (
9410
                $encoding === 'CP850'
9411
                ||
9412
                $encoding === 'ASCII'
9413
            )
9414
        ) {
9415
            return \strrchr($haystack, $needle);
9416
        }
9417
9418
        if (
9419
            $encoding !== 'UTF-8'
9420
            &&
9421
            self::$SUPPORT['mbstring'] === false
9422
        ) {
9423
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9424
        }
9425
9426
        //
9427
        // fallback via iconv
9428
        //
9429
9430
        if (self::$SUPPORT['iconv'] === true) {
9431
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9432
            if ($needle_tmp === false) {
9433
                return false;
9434
            }
9435
            $needle = (string) $needle_tmp;
9436
9437
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9438
            if ($pos === false) {
9439
                return false;
9440
            }
9441
9442
            if ($before_needle) {
9443
                return self::substr($haystack, 0, $pos, $encoding);
9444
            }
9445
9446
            return self::substr($haystack, $pos, null, $encoding);
9447
        }
9448
9449
        //
9450
        // fallback via vanilla php
9451
        //
9452
9453
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9454
        if ($needle_tmp === false) {
9455
            return false;
9456
        }
9457
        $needle = (string) $needle_tmp;
9458
9459
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9460
        if ($pos === false) {
9461
            return false;
9462
        }
9463
9464
        if ($before_needle) {
9465
            return self::substr($haystack, 0, $pos, $encoding);
9466
        }
9467
9468
        return self::substr($haystack, $pos, null, $encoding);
9469
    }
9470
9471
    /**
9472
     * Reverses characters order in the string.
9473
     *
9474
     * @param string $str      <p>The input string.</p>
9475
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9476
     *
9477
     * @return string the string with characters in the reverse sequence
9478
     */
9479
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9480
    {
9481 10
        if ($str === '') {
9482 4
            return '';
9483
        }
9484
9485
        // init
9486 8
        $reversed = '';
9487
9488 8
        $str = self::emoji_encode($str, true);
9489
9490 8
        if ($encoding === 'UTF-8') {
9491 8
            if (self::$SUPPORT['intl'] === true) {
9492
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9493 8
                $i = (int) \grapheme_strlen($str);
9494 8
                while ($i--) {
9495 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9496 8
                    if ($reversed_tmp !== false) {
9497 8
                        $reversed .= $reversed_tmp;
9498
                    }
9499
                }
9500
            } else {
9501
                $i = (int) \mb_strlen($str);
9502 8
                while ($i--) {
9503
                    $reversed_tmp = \mb_substr($str, $i, 1);
9504
                    if ($reversed_tmp !== false) {
9505
                        $reversed .= $reversed_tmp;
9506
                    }
9507
                }
9508
            }
9509
        } else {
9510
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9511
9512
            $i = (int) self::strlen($str, $encoding);
9513
            while ($i--) {
9514
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9515
                if ($reversed_tmp !== false) {
9516
                    $reversed .= $reversed_tmp;
9517
                }
9518
            }
9519
        }
9520
9521 8
        return self::emoji_decode($reversed, true);
9522
    }
9523
9524
    /**
9525
     * Find the last occurrence of a character in a string within another, case-insensitive.
9526
     *
9527
     * @see http://php.net/manual/en/function.mb-strrichr.php
9528
     *
9529
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9530
     * @param string $needle        <p>The string to find in haystack.</p>
9531
     * @param bool   $before_needle [optional] <p>
9532
     *                              Determines which portion of haystack
9533
     *                              this function returns.
9534
     *                              If set to true, it returns all of haystack
9535
     *                              from the beginning to the last occurrence of needle.
9536
     *                              If set to false, it returns all of haystack
9537
     *                              from the last occurrence of needle to the end,
9538
     *                              </p>
9539
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9540
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9541
     *
9542
     * @return false|string the portion of haystack or<br>false if needle is not found
9543
     */
9544
    public static function strrichr(
9545
        string $haystack,
9546
        string $needle,
9547
        bool $before_needle = false,
9548
        string $encoding = 'UTF-8',
9549
        bool $clean_utf8 = false
9550
    ) {
9551 3
        if ($haystack === '' || $needle === '') {
9552 2
            return false;
9553
        }
9554
9555 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9556 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9557
        }
9558
9559 3
        if ($clean_utf8 === true) {
9560
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9561
            // if invalid characters are found in $haystack before $needle
9562 2
            $needle = self::clean($needle);
9563 2
            $haystack = self::clean($haystack);
9564
        }
9565
9566
        //
9567
        // fallback via mbstring
9568
        //
9569
9570 3
        if (self::$SUPPORT['mbstring'] === true) {
9571 3
            if ($encoding === 'UTF-8') {
9572 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9573
            }
9574
9575 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9576
        }
9577
9578
        //
9579
        // fallback via vanilla php
9580
        //
9581
9582
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9583
        if ($needle_tmp === false) {
9584
            return false;
9585
        }
9586
        $needle = (string) $needle_tmp;
9587
9588
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9589
        if ($pos === false) {
9590
            return false;
9591
        }
9592
9593
        if ($before_needle) {
9594
            return self::substr($haystack, 0, $pos, $encoding);
9595
        }
9596
9597
        return self::substr($haystack, $pos, null, $encoding);
9598
    }
9599
9600
    /**
9601
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
9602
     *
9603
     * @param string     $haystack   <p>The string to look in.</p>
9604
     * @param int|string $needle     <p>The string to look for.</p>
9605
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9606
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9607
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9608
     *
9609
     * @return false|int
9610
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9611
     *                   string.<br>If needle is not found, it returns false.</p>
9612
     */
9613
    public static function strripos(
9614
        string $haystack,
9615
        $needle,
9616
        int $offset = 0,
9617
        string $encoding = 'UTF-8',
9618
        bool $clean_utf8 = false
9619
    ) {
9620 3
        if ($haystack === '') {
9621
            return false;
9622
        }
9623
9624
        // iconv and mbstring do not support integer $needle
9625 3
        if ((int) $needle === $needle && $needle >= 0) {
9626
            $needle = (string) self::chr($needle);
9627
        }
9628 3
        $needle = (string) $needle;
9629
9630 3
        if ($needle === '') {
9631
            return false;
9632
        }
9633
9634 3
        if ($clean_utf8 === true) {
9635
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9636 2
            $needle = self::clean($needle);
9637 2
            $haystack = self::clean($haystack);
9638
        }
9639
9640 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9641 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9642
        }
9643
9644
        //
9645
        // fallback via mbstrig
9646
        //
9647
9648 3
        if (self::$SUPPORT['mbstring'] === true) {
9649 3
            if ($encoding === 'UTF-8') {
9650 3
                return \mb_strripos($haystack, $needle, $offset);
9651
            }
9652
9653
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9654
        }
9655
9656
        //
9657
        // fallback for binary || ascii only
9658
        //
9659
9660
        if (
9661
            $encoding === 'CP850'
9662
            ||
9663
            $encoding === 'ASCII'
9664
        ) {
9665
            return \strripos($haystack, $needle, $offset);
9666
        }
9667
9668
        if (
9669
            $encoding !== 'UTF-8'
9670
            &&
9671
            self::$SUPPORT['mbstring'] === false
9672
        ) {
9673
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9674
        }
9675
9676
        //
9677
        // fallback via intl
9678
        //
9679
9680
        if (
9681
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9682
            &&
9683
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9684
            &&
9685
            self::$SUPPORT['intl'] === true
9686
        ) {
9687
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9688
            if ($return_tmp !== false) {
9689
                return $return_tmp;
9690
            }
9691
        }
9692
9693
        //
9694
        // fallback for ascii only
9695
        //
9696
9697
        if (ASCII::is_ascii($haystack . $needle)) {
9698
            return \strripos($haystack, $needle, $offset);
9699
        }
9700
9701
        //
9702
        // fallback via vanilla php
9703
        //
9704
9705
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9706
        $needle = self::strtocasefold($needle, true, false, $encoding);
9707
9708
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9709
    }
9710
9711
    /**
9712
     * Finds position of last occurrence of a string within another, case-insensitive.
9713
     *
9714
     * @param string $haystack <p>
9715
     *                         The string from which to get the position of the last occurrence
9716
     *                         of needle.
9717
     *                         </p>
9718
     * @param string $needle   <p>
9719
     *                         The string to find in haystack.
9720
     *                         </p>
9721
     * @param int    $offset   [optional] <p>
9722
     *                         The position in haystack
9723
     *                         to start searching.
9724
     *                         </p>
9725
     *
9726
     * @return false|int
9727
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9728
     *                   haystack string, or false if needle is not found.</p>
9729
     */
9730
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9731
    {
9732
        if ($haystack === '' || $needle === '') {
9733
            return false;
9734
        }
9735
9736
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9737
            // "mb_" is available if overload is used, so use it ...
9738
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9739
        }
9740
9741
        return \strripos($haystack, $needle, $offset);
9742
    }
9743
9744
    /**
9745
     * Find the position of the last occurrence of a substring in a string.
9746
     *
9747
     * @see http://php.net/manual/en/function.mb-strrpos.php
9748
     *
9749
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9750
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9751
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9752
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9753
     *                               the end of the string.
9754
     *                               </p>
9755
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9756
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9757
     *
9758
     * @return false|int
9759
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9760
     *                   string.<br>If needle is not found, it returns false.</p>
9761
     */
9762
    public static function strrpos(
9763
        string $haystack,
9764
        $needle,
9765
        int $offset = 0,
9766
        string $encoding = 'UTF-8',
9767
        bool $clean_utf8 = false
9768
    ) {
9769 35
        if ($haystack === '') {
9770 3
            return false;
9771
        }
9772
9773
        // iconv and mbstring do not support integer $needle
9774 34
        if ((int) $needle === $needle && $needle >= 0) {
9775 2
            $needle = (string) self::chr($needle);
9776
        }
9777 34
        $needle = (string) $needle;
9778
9779 34
        if ($needle === '') {
9780 2
            return false;
9781
        }
9782
9783 34
        if ($clean_utf8 === true) {
9784
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9785 4
            $needle = self::clean($needle);
9786 4
            $haystack = self::clean($haystack);
9787
        }
9788
9789 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9790 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9791
        }
9792
9793
        //
9794
        // fallback via mbstring
9795
        //
9796
9797 34
        if (self::$SUPPORT['mbstring'] === true) {
9798 34
            if ($encoding === 'UTF-8') {
9799 34
                return \mb_strrpos($haystack, $needle, $offset);
9800
            }
9801
9802 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9803
        }
9804
9805
        //
9806
        // fallback for binary || ascii only
9807
        //
9808
9809
        if (
9810
            $encoding === 'CP850'
9811
            ||
9812
            $encoding === 'ASCII'
9813
        ) {
9814
            return \strrpos($haystack, $needle, $offset);
9815
        }
9816
9817
        if (
9818
            $encoding !== 'UTF-8'
9819
            &&
9820
            self::$SUPPORT['mbstring'] === false
9821
        ) {
9822
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9823
        }
9824
9825
        //
9826
        // fallback via intl
9827
        //
9828
9829
        if (
9830
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9831
            &&
9832
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9833
            &&
9834
            self::$SUPPORT['intl'] === true
9835
        ) {
9836
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9837
            if ($return_tmp !== false) {
9838
                return $return_tmp;
9839
            }
9840
        }
9841
9842
        //
9843
        // fallback for ascii only
9844
        //
9845
9846
        if (ASCII::is_ascii($haystack . $needle)) {
9847
            return \strrpos($haystack, $needle, $offset);
9848
        }
9849
9850
        //
9851
        // fallback via vanilla php
9852
        //
9853
9854
        $haystack_tmp = null;
9855
        if ($offset > 0) {
9856
            $haystack_tmp = self::substr($haystack, $offset);
9857
        } elseif ($offset < 0) {
9858
            $haystack_tmp = self::substr($haystack, 0, $offset);
9859
            $offset = 0;
9860
        }
9861
9862
        if ($haystack_tmp !== null) {
9863
            if ($haystack_tmp === false) {
9864
                $haystack_tmp = '';
9865
            }
9866
            $haystack = (string) $haystack_tmp;
9867
        }
9868
9869
        $pos = \strrpos($haystack, $needle);
9870
        if ($pos === false) {
9871
            return false;
9872
        }
9873
9874
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
9875
        $str_tmp = \substr($haystack, 0, $pos);
9876
        if ($str_tmp === false) {
9877
            return false;
9878
        }
9879
9880
        return $offset + (int) self::strlen($str_tmp);
9881
    }
9882
9883
    /**
9884
     * Find the position of the last occurrence of a substring in a string.
9885
     *
9886
     * @param string $haystack <p>
9887
     *                         The string being checked, for the last occurrence
9888
     *                         of needle.
9889
     *                         </p>
9890
     * @param string $needle   <p>
9891
     *                         The string to find in haystack.
9892
     *                         </p>
9893
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9894
     *                         the string. Negative values will stop searching at an arbitrary point
9895
     *                         prior to the end of the string.
9896
     *                         </p>
9897
     *
9898
     * @return false|int
9899
     *                   <p>The numeric position of the last occurrence of needle in the
9900
     *                   haystack string. If needle is not found, it returns false.</p>
9901
     */
9902
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9903
    {
9904
        if ($haystack === '' || $needle === '') {
9905
            return false;
9906
        }
9907
9908
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9909
            // "mb_" is available if overload is used, so use it ...
9910
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9911
        }
9912
9913
        return \strrpos($haystack, $needle, $offset);
9914
    }
9915
9916
    /**
9917
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9918
     * mask.
9919
     *
9920
     * @param string $str      <p>The input string.</p>
9921
     * @param string $mask     <p>The mask of chars</p>
9922
     * @param int    $offset   [optional]
9923
     * @param int    $length   [optional]
9924
     * @param string $encoding [optional] <p>Set the charset.</p>
9925
     *
9926
     * @return false|int
9927
     */
9928
    public static function strspn(
9929
        string $str,
9930
        string $mask,
9931
        int $offset = 0,
9932
        int $length = null,
9933
        string $encoding = 'UTF-8'
9934
    ) {
9935 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9936
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9937
        }
9938
9939 10
        if ($offset || $length !== null) {
9940 2
            if ($encoding === 'UTF-8') {
9941 2
                if ($length === null) {
9942
                    $str = (string) \mb_substr($str, $offset);
9943
                } else {
9944 2
                    $str = (string) \mb_substr($str, $offset, $length);
9945
                }
9946
            } else {
9947
                $str = (string) self::substr($str, $offset, $length, $encoding);
9948
            }
9949
        }
9950
9951 10
        if ($str === '' || $mask === '') {
9952 2
            return 0;
9953
        }
9954
9955 8
        $matches = [];
9956
9957 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9958
    }
9959
9960
    /**
9961
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9962
     *
9963
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9964
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9965
     * @param bool   $before_needle [optional] <p>
9966
     *                              If <b>TRUE</b>, strstr() returns the part of the
9967
     *                              haystack before the first occurrence of the needle (excluding the needle).
9968
     *                              </p>
9969
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9970
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9971
     *
9972
     * @return false|string
9973
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9974
     */
9975
    public static function strstr(
9976
        string $haystack,
9977
        string $needle,
9978
        bool $before_needle = false,
9979
        string $encoding = 'UTF-8',
9980
        $clean_utf8 = false
9981
    ) {
9982 3
        if ($haystack === '' || $needle === '') {
9983 2
            return false;
9984
        }
9985
9986 3
        if ($clean_utf8 === true) {
9987
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9988
            // if invalid characters are found in $haystack before $needle
9989
            $needle = self::clean($needle);
9990
            $haystack = self::clean($haystack);
9991
        }
9992
9993 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9994 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9995
        }
9996
9997
        //
9998
        // fallback via mbstring
9999
        //
10000
10001 3
        if (self::$SUPPORT['mbstring'] === true) {
10002 3
            if ($encoding === 'UTF-8') {
10003 3
                return \mb_strstr($haystack, $needle, $before_needle);
10004
            }
10005
10006 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
10007
        }
10008
10009
        //
10010
        // fallback for binary || ascii only
10011
        //
10012
10013
        if (
10014
            $encoding === 'CP850'
10015
            ||
10016
            $encoding === 'ASCII'
10017
        ) {
10018
            return \strstr($haystack, $needle, $before_needle);
10019
        }
10020
10021
        if (
10022
            $encoding !== 'UTF-8'
10023
            &&
10024
            self::$SUPPORT['mbstring'] === false
10025
        ) {
10026
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10027
        }
10028
10029
        //
10030
        // fallback via intl
10031
        //
10032
10033
        if (
10034
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10035
            &&
10036
            self::$SUPPORT['intl'] === true
10037
        ) {
10038
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10039
            if ($return_tmp !== false) {
10040
                return $return_tmp;
10041
            }
10042
        }
10043
10044
        //
10045
        // fallback for ascii only
10046
        //
10047
10048
        if (ASCII::is_ascii($haystack . $needle)) {
10049
            return \strstr($haystack, $needle, $before_needle);
10050
        }
10051
10052
        //
10053
        // fallback via vanilla php
10054
        //
10055
10056
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10057
10058
        if (!isset($match[1])) {
10059
            return false;
10060
        }
10061
10062
        if ($before_needle) {
10063
            return $match[1];
10064
        }
10065
10066
        return self::substr($haystack, (int) self::strlen($match[1]));
10067
    }
10068
10069
    /**
10070
     *  * Finds first occurrence of a string within another.
10071
     *
10072
     * @param string $haystack      <p>
10073
     *                              The string from which to get the first occurrence
10074
     *                              of needle.
10075
     *                              </p>
10076
     * @param string $needle        <p>
10077
     *                              The string to find in haystack.
10078
     *                              </p>
10079
     * @param bool   $before_needle [optional] <p>
10080
     *                              Determines which portion of haystack
10081
     *                              this function returns.
10082
     *                              If set to true, it returns all of haystack
10083
     *                              from the beginning to the first occurrence of needle.
10084
     *                              If set to false, it returns all of haystack
10085
     *                              from the first occurrence of needle to the end,
10086
     *                              </p>
10087
     *
10088
     * @return false|string
10089
     *                      <p>The portion of haystack,
10090
     *                      or false if needle is not found.</p>
10091
     */
10092
    public static function strstr_in_byte(
10093
        string $haystack,
10094
        string $needle,
10095
        bool $before_needle = false
10096
    ) {
10097
        if ($haystack === '' || $needle === '') {
10098
            return false;
10099
        }
10100
10101
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10102
            // "mb_" is available if overload is used, so use it ...
10103
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10104
        }
10105
10106
        return \strstr($haystack, $needle, $before_needle);
10107
    }
10108
10109
    /**
10110
     * Unicode transformation for case-less matching.
10111
     *
10112
     * @see http://unicode.org/reports/tr21/tr21-5.html
10113
     *
10114
     * @param string      $str        <p>The input string.</p>
10115
     * @param bool        $full       [optional] <p>
10116
     *                                <b>true</b>, replace full case folding chars (default)<br>
10117
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10118
     *                                </p>
10119
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10120
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10121
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10122
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10123
     *                                is for some languages better ...</p>
10124
     *
10125
     * @return string
10126
     */
10127
    public static function strtocasefold(
10128
        string $str,
10129
        bool $full = true,
10130
        bool $clean_utf8 = false,
10131
        string $encoding = 'UTF-8',
10132
        string $lang = null,
10133
        $lower = true
10134
    ): string {
10135 32
        if ($str === '') {
10136 5
            return '';
10137
        }
10138
10139 31
        if ($clean_utf8 === true) {
10140
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10141
            // if invalid characters are found in $haystack before $needle
10142 2
            $str = self::clean($str);
10143
        }
10144
10145 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10146
10147 31
        if ($lang === null && $encoding === 'UTF-8') {
10148 31
            if ($lower === true) {
10149 2
                return \mb_strtolower($str);
10150
            }
10151
10152 29
            return \mb_strtoupper($str);
10153
        }
10154
10155 2
        if ($lower === true) {
10156
            return self::strtolower($str, $encoding, false, $lang);
10157
        }
10158
10159 2
        return self::strtoupper($str, $encoding, false, $lang);
10160
    }
10161
10162
    /**
10163
     * Make a string lowercase.
10164
     *
10165
     * @see http://php.net/manual/en/function.mb-strtolower.php
10166
     *
10167
     * @param string      $str                           <p>The string being lowercased.</p>
10168
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10169
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10170
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10171
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10172
     *
10173
     * @return string
10174
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10175
     */
10176
    public static function strtolower(
10177
        $str,
10178
        string $encoding = 'UTF-8',
10179
        bool $clean_utf8 = false,
10180
        string $lang = null,
10181
        bool $try_to_keep_the_string_length = false
10182
    ): string {
10183
        // init
10184 73
        $str = (string) $str;
10185
10186 73
        if ($str === '') {
10187 1
            return '';
10188
        }
10189
10190 72
        if ($clean_utf8 === true) {
10191
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10192
            // if invalid characters are found in $haystack before $needle
10193 2
            $str = self::clean($str);
10194
        }
10195
10196
        // hack for old php version or for the polyfill ...
10197 72
        if ($try_to_keep_the_string_length === true) {
10198
            $str = self::fixStrCaseHelper($str, true);
10199
        }
10200
10201 72
        if ($lang === null && $encoding === 'UTF-8') {
10202 13
            return \mb_strtolower($str);
10203
        }
10204
10205 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10206
10207 61
        if ($lang !== null) {
10208 2
            if (self::$SUPPORT['intl'] === true) {
10209 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10210
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10211
                }
10212
10213 2
                $language_code = $lang . '-Lower';
10214 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10215
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10216
10217
                    $language_code = 'Any-Lower';
10218
                }
10219
10220
                /** @noinspection PhpComposerExtensionStubsInspection */
10221
                /** @noinspection UnnecessaryCastingInspection */
10222 2
                return (string) \transliterator_transliterate($language_code, $str);
10223
            }
10224
10225
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10226
        }
10227
10228
        // always fallback via symfony polyfill
10229 61
        return \mb_strtolower($str, $encoding);
10230
    }
10231
10232
    /**
10233
     * Make a string uppercase.
10234
     *
10235
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10236
     *
10237
     * @param string      $str                           <p>The string being uppercased.</p>
10238
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10239
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10240
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10241
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10242
     *
10243
     * @return string
10244
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10245
     */
10246
    public static function strtoupper(
10247
        $str,
10248
        string $encoding = 'UTF-8',
10249
        bool $clean_utf8 = false,
10250
        string $lang = null,
10251
        bool $try_to_keep_the_string_length = false
10252
    ): string {
10253
        // init
10254 17
        $str = (string) $str;
10255
10256 17
        if ($str === '') {
10257 1
            return '';
10258
        }
10259
10260 16
        if ($clean_utf8 === true) {
10261
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10262
            // if invalid characters are found in $haystack before $needle
10263 2
            $str = self::clean($str);
10264
        }
10265
10266
        // hack for old php version or for the polyfill ...
10267 16
        if ($try_to_keep_the_string_length === true) {
10268 2
            $str = self::fixStrCaseHelper($str, false);
10269
        }
10270
10271 16
        if ($lang === null && $encoding === 'UTF-8') {
10272 8
            return \mb_strtoupper($str);
10273
        }
10274
10275 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10276
10277 10
        if ($lang !== null) {
10278 2
            if (self::$SUPPORT['intl'] === true) {
10279 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10280
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10281
                }
10282
10283 2
                $language_code = $lang . '-Upper';
10284 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10285
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10286
10287
                    $language_code = 'Any-Upper';
10288
                }
10289
10290
                /** @noinspection PhpComposerExtensionStubsInspection */
10291
                /** @noinspection UnnecessaryCastingInspection */
10292 2
                return (string) \transliterator_transliterate($language_code, $str);
10293
            }
10294
10295
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10296
        }
10297
10298
        // always fallback via symfony polyfill
10299 10
        return \mb_strtoupper($str, $encoding);
10300
    }
10301
10302
    /**
10303
     * Translate characters or replace sub-strings.
10304
     *
10305
     * @see http://php.net/manual/en/function.strtr.php
10306
     *
10307
     * @param string          $str  <p>The string being translated.</p>
10308
     * @param string|string[] $from <p>The string replacing from.</p>
10309
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10310
     *
10311
     * @return string
10312
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10313
     *                corresponding character in "to".</p>
10314
     */
10315
    public static function strtr(string $str, $from, $to = ''): string
10316
    {
10317 2
        if ($str === '') {
10318
            return '';
10319
        }
10320
10321 2
        if ($from === $to) {
10322
            return $str;
10323
        }
10324
10325 2
        if ($to !== '') {
10326 2
            $from = self::str_split($from);
10327 2
            $to = self::str_split($to);
10328 2
            $count_from = \count($from);
10329 2
            $count_to = \count($to);
10330
10331 2
            if ($count_from > $count_to) {
10332 2
                $from = \array_slice($from, 0, $count_to);
10333 2
            } elseif ($count_from < $count_to) {
10334 2
                $to = \array_slice($to, 0, $count_from);
10335
            }
10336
10337 2
            $from = \array_combine($from, $to);
10338
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10339 2
            if ($from === false) {
10340
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10341
            }
10342
        }
10343
10344 2
        if (\is_string($from)) {
10345 2
            return \str_replace($from, '', $str);
10346
        }
10347
10348 2
        return \strtr($str, $from);
10349
    }
10350
10351
    /**
10352
     * Return the width of a string.
10353
     *
10354
     * @param string $str        <p>The input string.</p>
10355
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10356
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10357
     *
10358
     * @return int
10359
     */
10360
    public static function strwidth(
10361
        string $str,
10362
        string $encoding = 'UTF-8',
10363
        bool $clean_utf8 = false
10364
    ): int {
10365 2
        if ($str === '') {
10366 2
            return 0;
10367
        }
10368
10369 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10370 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10371
        }
10372
10373 2
        if ($clean_utf8 === true) {
10374
            // iconv and mbstring are not tolerant to invalid encoding
10375
            // further, their behaviour is inconsistent with that of PHP's substr
10376 2
            $str = self::clean($str);
10377
        }
10378
10379
        //
10380
        // fallback via mbstring
10381
        //
10382
10383 2
        if (self::$SUPPORT['mbstring'] === true) {
10384 2
            if ($encoding === 'UTF-8') {
10385 2
                return \mb_strwidth($str);
10386
            }
10387
10388
            return \mb_strwidth($str, $encoding);
10389
        }
10390
10391
        //
10392
        // fallback via vanilla php
10393
        //
10394
10395
        if ($encoding !== 'UTF-8') {
10396
            $str = self::encode('UTF-8', $str, false, $encoding);
10397
        }
10398
10399
        $wide = 0;
10400
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10401
10402
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10403
    }
10404
10405
    /**
10406
     * Get part of a string.
10407
     *
10408
     * @see http://php.net/manual/en/function.mb-substr.php
10409
     *
10410
     * @param string $str        <p>The string being checked.</p>
10411
     * @param int    $offset     <p>The first position used in str.</p>
10412
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10413
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10414
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10415
     *
10416
     * @return false|string
10417
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10418
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10419
     *                      characters long, <b>FALSE</b> will be returned.
10420
     */
10421
    public static function substr(
10422
        string $str,
10423
        int $offset = 0,
10424
        int $length = null,
10425
        string $encoding = 'UTF-8',
10426
        bool $clean_utf8 = false
10427
    ) {
10428
        // empty string
10429 172
        if ($str === '' || $length === 0) {
10430 8
            return '';
10431
        }
10432
10433 168
        if ($clean_utf8 === true) {
10434
            // iconv and mbstring are not tolerant to invalid encoding
10435
            // further, their behaviour is inconsistent with that of PHP's substr
10436 2
            $str = self::clean($str);
10437
        }
10438
10439
        // whole string
10440 168
        if (!$offset && $length === null) {
10441 7
            return $str;
10442
        }
10443
10444 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10445 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10446
        }
10447
10448
        //
10449
        // fallback via mbstring
10450
        //
10451
10452 163
        if (self::$SUPPORT['mbstring'] === true) {
10453 161
            if ($encoding === 'UTF-8') {
10454 161
                if ($length === null) {
10455 64
                    return \mb_substr($str, $offset);
10456
                }
10457
10458 102
                return \mb_substr($str, $offset, $length);
10459
            }
10460
10461
            return self::substr($str, $offset, $length, $encoding);
10462
        }
10463
10464
        //
10465
        // fallback for binary || ascii only
10466
        //
10467
10468
        if (
10469 4
            $encoding === 'CP850'
10470
            ||
10471 4
            $encoding === 'ASCII'
10472
        ) {
10473
            if ($length === null) {
10474
                return \substr($str, $offset);
10475
            }
10476
10477
            return \substr($str, $offset, $length);
10478
        }
10479
10480
        // otherwise we need the string-length
10481 4
        $str_length = 0;
10482 4
        if ($offset || $length === null) {
10483 4
            $str_length = self::strlen($str, $encoding);
10484
        }
10485
10486
        // e.g.: invalid chars + mbstring not installed
10487 4
        if ($str_length === false) {
10488
            return false;
10489
        }
10490
10491
        // empty string
10492 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10493
            return '';
10494
        }
10495
10496
        // impossible
10497 4
        if ($offset && $offset > $str_length) {
10498
            return '';
10499
        }
10500
10501 4
        if ($length === null) {
10502 4
            $length = (int) $str_length;
10503
        } else {
10504 2
            $length = (int) $length;
10505
        }
10506
10507
        if (
10508 4
            $encoding !== 'UTF-8'
10509
            &&
10510 4
            self::$SUPPORT['mbstring'] === false
10511
        ) {
10512 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10513
        }
10514
10515
        //
10516
        // fallback via intl
10517
        //
10518
10519
        if (
10520 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10521
            &&
10522 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10523
            &&
10524 4
            self::$SUPPORT['intl'] === true
10525
        ) {
10526
            $return_tmp = \grapheme_substr($str, $offset, $length);
10527
            if ($return_tmp !== false) {
10528
                return $return_tmp;
10529
            }
10530
        }
10531
10532
        //
10533
        // fallback via iconv
10534
        //
10535
10536
        if (
10537 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10538
            &&
10539 4
            self::$SUPPORT['iconv'] === true
10540
        ) {
10541
            $return_tmp = \iconv_substr($str, $offset, $length);
10542
            if ($return_tmp !== false) {
10543
                return $return_tmp;
10544
            }
10545
        }
10546
10547
        //
10548
        // fallback for ascii only
10549
        //
10550
10551 4
        if (ASCII::is_ascii($str)) {
10552
            return \substr($str, $offset, $length);
10553
        }
10554
10555
        //
10556
        // fallback via vanilla php
10557
        //
10558
10559
        // split to array, and remove invalid characters
10560 4
        $array = self::str_split($str);
10561
10562
        // extract relevant part, and join to make sting again
10563 4
        return \implode('', \array_slice($array, $offset, $length));
10564
    }
10565
10566
    /**
10567
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
10568
     *
10569
     * @param string   $str1               <p>The main string being compared.</p>
10570
     * @param string   $str2               <p>The secondary string being compared.</p>
10571
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10572
     *                                     counting from the end of the string.</p>
10573
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10574
     *                                     of the length of the str compared to the length of main_str less the
10575
     *                                     offset.</p>
10576
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10577
     *                                     insensitive.</p>
10578
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10579
     *
10580
     * @return int
10581
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10582
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10583
     *             <strong>0</strong> if they are equal
10584
     */
10585
    public static function substr_compare(
10586
        string $str1,
10587
        string $str2,
10588
        int $offset = 0,
10589
        int $length = null,
10590
        bool $case_insensitivity = false,
10591
        string $encoding = 'UTF-8'
10592
    ): int {
10593
        if (
10594 2
            $offset !== 0
10595
            ||
10596 2
            $length !== null
10597
        ) {
10598 2
            if ($encoding === 'UTF-8') {
10599 2
                if ($length === null) {
10600 2
                    $str1 = (string) \mb_substr($str1, $offset);
10601
                } else {
10602 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10603
                }
10604 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10605
            } else {
10606
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10607
10608
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10609
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10610
            }
10611
        }
10612
10613 2
        if ($case_insensitivity === true) {
10614 2
            return self::strcasecmp($str1, $str2, $encoding);
10615
        }
10616
10617 2
        return self::strcmp($str1, $str2);
10618
    }
10619
10620
    /**
10621
     * Count the number of substring occurrences.
10622
     *
10623
     * @see http://php.net/manual/en/function.substr-count.php
10624
     *
10625
     * @param string $haystack   <p>The string to search in.</p>
10626
     * @param string $needle     <p>The substring to search for.</p>
10627
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10628
     * @param int    $length     [optional] <p>
10629
     *                           The maximum length after the specified offset to search for the
10630
     *                           substring. It outputs a warning if the offset plus the length is
10631
     *                           greater than the haystack length.
10632
     *                           </p>
10633
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10634
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10635
     *
10636
     * @return false|int this functions returns an integer or false if there isn't a string
10637
     */
10638
    public static function substr_count(
10639
        string $haystack,
10640
        string $needle,
10641
        int $offset = 0,
10642
        int $length = null,
10643
        string $encoding = 'UTF-8',
10644
        bool $clean_utf8 = false
10645
    ) {
10646 5
        if ($haystack === '' || $needle === '') {
10647 2
            return false;
10648
        }
10649
10650 5
        if ($length === 0) {
10651 2
            return 0;
10652
        }
10653
10654 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10655 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10656
        }
10657
10658 5
        if ($clean_utf8 === true) {
10659
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10660
            // if invalid characters are found in $haystack before $needle
10661
            $needle = self::clean($needle);
10662
            $haystack = self::clean($haystack);
10663
        }
10664
10665 5
        if ($offset || $length > 0) {
10666 2
            if ($length === null) {
10667 2
                $length_tmp = self::strlen($haystack, $encoding);
10668 2
                if ($length_tmp === false) {
10669
                    return false;
10670
                }
10671 2
                $length = (int) $length_tmp;
10672
            }
10673
10674 2
            if ($encoding === 'UTF-8') {
10675 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10676
            } else {
10677 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10678
            }
10679
        }
10680
10681
        if (
10682 5
            $encoding !== 'UTF-8'
10683
            &&
10684 5
            self::$SUPPORT['mbstring'] === false
10685
        ) {
10686
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10687
        }
10688
10689 5
        if (self::$SUPPORT['mbstring'] === true) {
10690 5
            if ($encoding === 'UTF-8') {
10691 5
                return \mb_substr_count($haystack, $needle);
10692
            }
10693
10694 2
            return \mb_substr_count($haystack, $needle, $encoding);
10695
        }
10696
10697
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10698
10699
        return \count($matches);
10700
    }
10701
10702
    /**
10703
     * Count the number of substring occurrences.
10704
     *
10705
     * @param string $haystack <p>
10706
     *                         The string being checked.
10707
     *                         </p>
10708
     * @param string $needle   <p>
10709
     *                         The string being found.
10710
     *                         </p>
10711
     * @param int    $offset   [optional] <p>
10712
     *                         The offset where to start counting
10713
     *                         </p>
10714
     * @param int    $length   [optional] <p>
10715
     *                         The maximum length after the specified offset to search for the
10716
     *                         substring. It outputs a warning if the offset plus the length is
10717
     *                         greater than the haystack length.
10718
     *                         </p>
10719
     *
10720
     * @return false|int the number of times the
10721
     *                   needle substring occurs in the
10722
     *                   haystack string
10723
     */
10724
    public static function substr_count_in_byte(
10725
        string $haystack,
10726
        string $needle,
10727
        int $offset = 0,
10728
        int $length = null
10729
    ) {
10730
        if ($haystack === '' || $needle === '') {
10731
            return 0;
10732
        }
10733
10734
        if (
10735
            ($offset || $length !== null)
10736
            &&
10737
            self::$SUPPORT['mbstring_func_overload'] === true
10738
        ) {
10739
            if ($length === null) {
10740
                $length_tmp = self::strlen($haystack);
10741
                if ($length_tmp === false) {
10742
                    return false;
10743
                }
10744
                $length = (int) $length_tmp;
10745
            }
10746
10747
            if (
10748
                (
10749
                    $length !== 0
10750
                    &&
10751
                    $offset !== 0
10752
                )
10753
                &&
10754
                ($length + $offset) <= 0
10755
                &&
10756
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10757
            ) {
10758
                return false;
10759
            }
10760
10761
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
10762
            $haystack_tmp = \substr($haystack, $offset, $length);
10763
            if ($haystack_tmp === false) {
10764
                $haystack_tmp = '';
10765
            }
10766
            $haystack = (string) $haystack_tmp;
10767
        }
10768
10769
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10770
            // "mb_" is available if overload is used, so use it ...
10771
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10772
        }
10773
10774
        if ($length === null) {
10775
            return \substr_count($haystack, $needle, $offset);
10776
        }
10777
10778
        return \substr_count($haystack, $needle, $offset, $length);
10779
    }
10780
10781
    /**
10782
     * Returns the number of occurrences of $substring in the given string.
10783
     * By default, the comparison is case-sensitive, but can be made insensitive
10784
     * by setting $case_sensitive to false.
10785
     *
10786
     * @param string $str            <p>The input string.</p>
10787
     * @param string $substring      <p>The substring to search for.</p>
10788
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10789
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10790
     *
10791
     * @return int
10792
     */
10793
    public static function substr_count_simple(
10794
        string $str,
10795
        string $substring,
10796
        bool $case_sensitive = true,
10797
        string $encoding = 'UTF-8'
10798
    ): int {
10799 15
        if ($str === '' || $substring === '') {
10800 2
            return 0;
10801
        }
10802
10803 13
        if ($encoding === 'UTF-8') {
10804 7
            if ($case_sensitive) {
10805
                return (int) \mb_substr_count($str, $substring);
10806
            }
10807
10808 7
            return (int) \mb_substr_count(
10809 7
                \mb_strtoupper($str),
10810 7
                \mb_strtoupper($substring)
10811
            );
10812
        }
10813
10814 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10815
10816 6
        if ($case_sensitive) {
10817 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10818
        }
10819
10820 3
        return (int) \mb_substr_count(
10821 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10822 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10823 3
            $encoding
10824
        );
10825
    }
10826
10827
    /**
10828
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
10829
     *
10830
     * @param string $haystack <p>The string to search in.</p>
10831
     * @param string $needle   <p>The substring to search for.</p>
10832
     *
10833
     * @return string return the sub-string
10834
     */
10835
    public static function substr_ileft(string $haystack, string $needle): string
10836
    {
10837 2
        if ($haystack === '') {
10838 2
            return '';
10839
        }
10840
10841 2
        if ($needle === '') {
10842 2
            return $haystack;
10843
        }
10844
10845 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10846 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10847
        }
10848
10849 2
        return $haystack;
10850
    }
10851
10852
    /**
10853
     * Get part of a string process in bytes.
10854
     *
10855
     * @param string $str    <p>The string being checked.</p>
10856
     * @param int    $offset <p>The first position used in str.</p>
10857
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10858
     *
10859
     * @return false|string
10860
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10861
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10862
     *                      characters long, <b>FALSE</b> will be returned.
10863
     */
10864
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10865
    {
10866
        // empty string
10867
        if ($str === '' || $length === 0) {
10868
            return '';
10869
        }
10870
10871
        // whole string
10872
        if (!$offset && $length === null) {
10873
            return $str;
10874
        }
10875
10876
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10877
            // "mb_" is available if overload is used, so use it ...
10878
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10879
        }
10880
10881
        return \substr($str, $offset, $length ?? 2147483647);
10882
    }
10883
10884
    /**
10885
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
10886
     *
10887
     * @param string $haystack <p>The string to search in.</p>
10888
     * @param string $needle   <p>The substring to search for.</p>
10889
     *
10890
     * @return string return the sub-string
10891
     */
10892
    public static function substr_iright(string $haystack, string $needle): string
10893
    {
10894 2
        if ($haystack === '') {
10895 2
            return '';
10896
        }
10897
10898 2
        if ($needle === '') {
10899 2
            return $haystack;
10900
        }
10901
10902 2
        if (self::str_iends_with($haystack, $needle) === true) {
10903 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10904
        }
10905
10906 2
        return $haystack;
10907
    }
10908
10909
    /**
10910
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
10911
     *
10912
     * @param string $haystack <p>The string to search in.</p>
10913
     * @param string $needle   <p>The substring to search for.</p>
10914
     *
10915
     * @return string return the sub-string
10916
     */
10917
    public static function substr_left(string $haystack, string $needle): string
10918
    {
10919 2
        if ($haystack === '') {
10920 2
            return '';
10921
        }
10922
10923 2
        if ($needle === '') {
10924 2
            return $haystack;
10925
        }
10926
10927 2
        if (self::str_starts_with($haystack, $needle) === true) {
10928 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10929
        }
10930
10931 2
        return $haystack;
10932
    }
10933
10934
    /**
10935
     * Replace text within a portion of a string.
10936
     *
10937
     * source: https://gist.github.com/stemar/8287074
10938
     *
10939
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10940
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10941
     * @param int|int[]       $offset      <p>
10942
     *                                     If start is positive, the replacing will begin at the start'th offset
10943
     *                                     into string.
10944
     *                                     <br><br>
10945
     *                                     If start is negative, the replacing will begin at the start'th character
10946
     *                                     from the end of string.
10947
     *                                     </p>
10948
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10949
     *                                     portion of string which is to be replaced. If it is negative, it
10950
     *                                     represents the number of characters from the end of string at which to
10951
     *                                     stop replacing. If it is not given, then it will default to strlen(
10952
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10953
     *                                     length is zero then this function will have the effect of inserting
10954
     *                                     replacement into string at the given start offset.</p>
10955
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10956
     *
10957
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10958
     */
10959
    public static function substr_replace(
10960
        $str,
10961
        $replacement,
10962
        $offset,
10963
        $length = null,
10964
        string $encoding = 'UTF-8'
10965
    ) {
10966 10
        if (\is_array($str) === true) {
10967 1
            $num = \count($str);
10968
10969
            // the replacement
10970 1
            if (\is_array($replacement) === true) {
10971 1
                $replacement = \array_slice($replacement, 0, $num);
10972
            } else {
10973 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10974
            }
10975
10976
            // the offset
10977 1
            if (\is_array($offset) === true) {
10978 1
                $offset = \array_slice($offset, 0, $num);
10979 1
                foreach ($offset as &$value_tmp) {
10980 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10981
                }
10982 1
                unset($value_tmp);
10983
            } else {
10984 1
                $offset = \array_pad([$offset], $num, $offset);
10985
            }
10986
10987
            // the length
10988 1
            if ($length === null) {
10989 1
                $length = \array_fill(0, $num, 0);
10990 1
            } elseif (\is_array($length) === true) {
10991 1
                $length = \array_slice($length, 0, $num);
10992 1
                foreach ($length as &$value_tmp_V2) {
10993 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
10994
                }
10995 1
                unset($value_tmp_V2);
10996
            } else {
10997 1
                $length = \array_pad([$length], $num, $length);
10998
            }
10999
11000
            // recursive call
11001 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
11002
        }
11003
11004 10
        if (\is_array($replacement) === true) {
11005 1
            if ($replacement !== []) {
11006 1
                $replacement = $replacement[0];
11007
            } else {
11008 1
                $replacement = '';
11009
            }
11010
        }
11011
11012
        // init
11013 10
        $str = (string) $str;
11014 10
        $replacement = (string) $replacement;
11015
11016 10
        if (\is_array($length) === true) {
11017
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
11018
        }
11019
11020 10
        if (\is_array($offset) === true) {
11021
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11022
        }
11023
11024 10
        if ($str === '') {
11025 1
            return $replacement;
11026
        }
11027
11028 9
        if (self::$SUPPORT['mbstring'] === true) {
11029 9
            $string_length = (int) self::strlen($str, $encoding);
11030
11031 9
            if ($offset < 0) {
11032 1
                $offset = (int) \max(0, $string_length + $offset);
11033 9
            } elseif ($offset > $string_length) {
11034 1
                $offset = $string_length;
11035
            }
11036
11037 9
            if ($length !== null && $length < 0) {
11038 1
                $length = (int) \max(0, $string_length - $offset + $length);
11039 9
            } elseif ($length === null || $length > $string_length) {
11040 4
                $length = $string_length;
11041
            }
11042
11043
            /** @noinspection AdditionOperationOnArraysInspection */
11044 9
            if (($offset + $length) > $string_length) {
11045 4
                $length = $string_length - $offset;
11046
            }
11047
11048
            /** @noinspection AdditionOperationOnArraysInspection */
11049 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11050 9
                   $replacement .
11051 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11052
        }
11053
11054
        //
11055
        // fallback for ascii only
11056
        //
11057
11058
        if (ASCII::is_ascii($str)) {
11059
            return ($length === null) ?
11060
                \substr_replace($str, $replacement, $offset) :
11061
                \substr_replace($str, $replacement, $offset, $length);
11062
        }
11063
11064
        //
11065
        // fallback via vanilla php
11066
        //
11067
11068
        \preg_match_all('/./us', $str, $str_matches);
11069
        \preg_match_all('/./us', $replacement, $replacement_matches);
11070
11071
        if ($length === null) {
11072
            $length_tmp = self::strlen($str, $encoding);
11073
            if ($length_tmp === false) {
11074
                // e.g.: non mbstring support + invalid chars
11075
                return '';
11076
            }
11077
            $length = (int) $length_tmp;
11078
        }
11079
11080
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
11081
11082
        return \implode('', $str_matches[0]);
11083
    }
11084
11085
    /**
11086
     * Removes a suffix ($needle) from the end of the string ($haystack).
11087
     *
11088
     * @param string $haystack <p>The string to search in.</p>
11089
     * @param string $needle   <p>The substring to search for.</p>
11090
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
11091
     *
11092
     * @return string return the sub-string
11093
     */
11094
    public static function substr_right(
11095
        string $haystack,
11096
        string $needle,
11097
        string $encoding = 'UTF-8'
11098
    ): string {
11099 2
        if ($haystack === '') {
11100 2
            return '';
11101
        }
11102
11103 2
        if ($needle === '') {
11104 2
            return $haystack;
11105
        }
11106
11107
        if (
11108 2
            $encoding === 'UTF-8'
11109
            &&
11110 2
            \substr($haystack, -\strlen($needle)) === $needle
11111
        ) {
11112 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
11113
        }
11114
11115 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
11116
            return (string) self::substr(
11117
                $haystack,
11118
                0,
11119
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
11120
                $encoding
11121
            );
11122
        }
11123
11124 2
        return $haystack;
11125
    }
11126
11127
    /**
11128
     * Returns a case swapped version of the string.
11129
     *
11130
     * @param string $str        <p>The input string.</p>
11131
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11132
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11133
     *
11134
     * @return string each character's case swapped
11135
     */
11136
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
11137
    {
11138 6
        if ($str === '') {
11139 1
            return '';
11140
        }
11141
11142 6
        if ($clean_utf8 === true) {
11143
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11144
            // if invalid characters are found in $haystack before $needle
11145 2
            $str = self::clean($str);
11146
        }
11147
11148 6
        if ($encoding === 'UTF-8') {
11149 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11150
        }
11151
11152 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11153
    }
11154
11155
    /**
11156
     * Checks whether symfony-polyfills are used.
11157
     *
11158
     * @return bool
11159
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11160
     */
11161
    public static function symfony_polyfill_used(): bool
11162
    {
11163
        // init
11164
        $return = false;
11165
11166
        $return_tmp = \extension_loaded('mbstring');
11167
        if ($return_tmp === false && \function_exists('mb_strlen')) {
11168
            $return = true;
11169
        }
11170
11171
        $return_tmp = \extension_loaded('iconv');
11172
        if ($return_tmp === false && \function_exists('iconv')) {
11173
            $return = true;
11174
        }
11175
11176
        return $return;
11177
    }
11178
11179
    /**
11180
     * @param string $str
11181
     * @param int    $tab_length
11182
     *
11183
     * @return string
11184
     */
11185
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11186
    {
11187 6
        if ($tab_length === 4) {
11188 3
            $spaces = '    ';
11189 3
        } elseif ($tab_length === 2) {
11190 1
            $spaces = '  ';
11191
        } else {
11192 2
            $spaces = \str_repeat(' ', $tab_length);
11193
        }
11194
11195 6
        return \str_replace("\t", $spaces, $str);
11196
    }
11197
11198
    /**
11199
     * Converts the first character of each word in the string to uppercase
11200
     * and all other chars to lowercase.
11201
     *
11202
     * @param string      $str                           <p>The input string.</p>
11203
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11204
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11205
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11206
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11207
     *
11208
     * @return string
11209
     *                <p>A string with all characters of $str being title-cased.</p>
11210
     */
11211
    public static function titlecase(
11212
        string $str,
11213
        string $encoding = 'UTF-8',
11214
        bool $clean_utf8 = false,
11215
        string $lang = null,
11216
        bool $try_to_keep_the_string_length = false
11217
    ): string {
11218 5
        if ($clean_utf8 === true) {
11219
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11220
            // if invalid characters are found in $haystack before $needle
11221
            $str = self::clean($str);
11222
        }
11223
11224
        if (
11225 5
            $lang === null
11226
            &&
11227 5
            $try_to_keep_the_string_length === false
11228
        ) {
11229 5
            if ($encoding === 'UTF-8') {
11230 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11231
            }
11232
11233 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11234
11235 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11236
        }
11237
11238
        return self::str_titleize(
11239
            $str,
11240
            null,
11241
            $encoding,
11242
            false,
11243
            $lang,
11244
            $try_to_keep_the_string_length,
11245
            false
11246
        );
11247
    }
11248
11249
    /**
11250
     * alias for "UTF8::to_ascii()"
11251
     *
11252
     * @param string $str
11253
     * @param string $subst_chr
11254
     * @param bool   $strict
11255
     *
11256
     * @return string
11257
     *
11258
     * @see UTF8::to_ascii()
11259
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11260
     */
11261
    public static function toAscii(
11262
        string $str,
11263
        string $subst_chr = '?',
11264
        bool $strict = false
11265
    ): string {
11266 7
        return self::to_ascii($str, $subst_chr, $strict);
11267
    }
11268
11269
    /**
11270
     * alias for "UTF8::to_iso8859()"
11271
     *
11272
     * @param string|string[] $str
11273
     *
11274
     * @return string|string[]
11275
     *
11276
     * @see UTF8::to_iso8859()
11277
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11278
     */
11279
    public static function toIso8859($str)
11280
    {
11281 2
        return self::to_iso8859($str);
11282
    }
11283
11284
    /**
11285
     * alias for "UTF8::to_latin1()"
11286
     *
11287
     * @param string|string[] $str
11288
     *
11289
     * @return string|string[]
11290
     *
11291
     * @see UTF8::to_iso8859()
11292
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11293
     */
11294
    public static function toLatin1($str)
11295
    {
11296 2
        return self::to_iso8859($str);
11297
    }
11298
11299
    /**
11300
     * alias for "UTF8::to_utf8()"
11301
     *
11302
     * @param string|string[] $str
11303
     *
11304
     * @return string|string[]
11305
     *
11306
     * @see UTF8::to_utf8()
11307
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11308
     */
11309
    public static function toUTF8($str)
11310
    {
11311 2
        return self::to_utf8($str);
11312
    }
11313
11314
    /**
11315
     * Convert a string into ASCII.
11316
     *
11317
     * @param string $str     <p>The input string.</p>
11318
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11319
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11320
     *                        performance</p>
11321
     *
11322
     * @return string
11323
     */
11324
    public static function to_ascii(
11325
        string $str,
11326
        string $unknown = '?',
11327
        bool $strict = false
11328
    ): string {
11329 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11330
    }
11331
11332
    /**
11333
     * @param mixed $str
11334
     *
11335
     * @return bool
11336
     */
11337
    public static function to_boolean($str): bool
11338
    {
11339
        // init
11340 19
        $str = (string) $str;
11341
11342 19
        if ($str === '') {
11343 2
            return false;
11344
        }
11345
11346
        // Info: http://php.net/manual/en/filter.filters.validate.php
11347
        $map = [
11348 17
            'true'  => true,
11349
            '1'     => true,
11350
            'on'    => true,
11351
            'yes'   => true,
11352
            'false' => false,
11353
            '0'     => false,
11354
            'off'   => false,
11355
            'no'    => false,
11356
        ];
11357
11358 17
        if (isset($map[$str])) {
11359 11
            return $map[$str];
11360
        }
11361
11362 6
        $key = \strtolower($str);
11363 6
        if (isset($map[$key])) {
11364 2
            return $map[$key];
11365
        }
11366
11367 4
        if (\is_numeric($str)) {
11368 2
            return ((float) $str + 0) > 0;
11369
        }
11370
11371 2
        return (bool) \trim($str);
11372
    }
11373
11374
    /**
11375
     * Convert given string to safe filename (and keep string case).
11376
     *
11377
     * @param string $str
11378
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11379
     *                                  simply replaced with hyphen.
11380
     * @param string $fallback_char
11381
     *
11382
     * @return string
11383
     */
11384
    public static function to_filename(
11385
        string $str,
11386
        bool $use_transliterate = false,
11387
        string $fallback_char = '-'
11388
    ): string {
11389 1
        return ASCII::to_filename(
11390 1
            $str,
11391 1
            $use_transliterate,
11392 1
            $fallback_char
11393
        );
11394
    }
11395
11396
    /**
11397
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11398
     *
11399
     * @param string|string[] $str
11400
     *
11401
     * @return string|string[]
11402
     */
11403
    public static function to_iso8859($str)
11404
    {
11405 8
        if (\is_array($str) === true) {
11406 2
            foreach ($str as $k => &$v) {
11407 2
                $v = self::to_iso8859($v);
11408
            }
11409
11410 2
            return $str;
11411
        }
11412
11413 8
        $str = (string) $str;
11414 8
        if ($str === '') {
11415 2
            return '';
11416
        }
11417
11418 8
        return self::utf8_decode($str);
11419
    }
11420
11421
    /**
11422
     * alias for "UTF8::to_iso8859()"
11423
     *
11424
     * @param string|string[] $str
11425
     *
11426
     * @return string|string[]
11427
     *
11428
     * @see UTF8::to_iso8859()
11429
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11430
     */
11431
    public static function to_latin1($str)
11432
    {
11433 2
        return self::to_iso8859($str);
11434
    }
11435
11436
    /**
11437
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11438
     *
11439
     * <ul>
11440
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
11441
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11442
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11443
     * case.</li>
11444
     * </ul>
11445
     *
11446
     * @param string|string[] $str                        <p>Any string or array.</p>
11447
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11448
     *
11449
     * @return string|string[] the UTF-8 encoded string
11450
     */
11451
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11452
    {
11453 43
        if (\is_array($str) === true) {
11454 4
            foreach ($str as $k => &$v) {
11455 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11456
            }
11457
11458 4
            return $str;
11459
        }
11460
11461 43
        $str = (string) $str;
11462 43
        if ($str === '') {
11463 7
            return $str;
11464
        }
11465
11466 43
        $max = \strlen($str);
11467 43
        $buf = '';
11468
11469 43
        for ($i = 0; $i < $max; ++$i) {
11470 43
            $c1 = $str[$i];
11471
11472 43
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11473
11474 39
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11475
11476 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11477
11478 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11479 22
                        $buf .= $c1 . $c2;
11480 22
                        ++$i;
11481
                    } else { // not valid UTF8 - convert it
11482 36
                        $buf .= self::to_utf8_convert_helper($c1);
11483
                    }
11484 36
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11485
11486 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11487 35
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11488
11489 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11490 17
                        $buf .= $c1 . $c2 . $c3;
11491 17
                        $i += 2;
11492
                    } else { // not valid UTF8 - convert it
11493 35
                        $buf .= self::to_utf8_convert_helper($c1);
11494
                    }
11495 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11496
11497 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11498 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11499 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11500
11501 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11502 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
11503 10
                        $i += 3;
11504
                    } else { // not valid UTF8 - convert it
11505 28
                        $buf .= self::to_utf8_convert_helper($c1);
11506
                    }
11507
                } else { // doesn't look like UTF8, but should be converted
11508
11509 39
                    $buf .= self::to_utf8_convert_helper($c1);
11510
                }
11511 40
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11512
11513 4
                $buf .= self::to_utf8_convert_helper($c1);
11514
            } else { // it doesn't need conversion
11515
11516 40
                $buf .= $c1;
11517
            }
11518
        }
11519
11520
        // decode unicode escape sequences + unicode surrogate pairs
11521 43
        $buf = \preg_replace_callback(
11522 43
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11523
            /**
11524
             * @param array $matches
11525
             *
11526
             * @return string
11527
             */
11528
            static function (array $matches): string {
11529 13
                if (isset($matches[3])) {
11530 13
                    $cp = (int) \hexdec($matches[3]);
11531
                } else {
11532
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11533 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
11534 1
                          + (int) \hexdec($matches[2])
11535 1
                          + 0x10000
11536 1
                          - (0xD800 << 10)
11537 1
                          - 0xDC00;
11538
                }
11539
11540
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11541
                //
11542
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11543
11544 13
                if ($cp < 0x80) {
11545 8
                    return (string) self::chr($cp);
11546
                }
11547
11548 10
                if ($cp < 0xA0) {
11549
                    /** @noinspection UnnecessaryCastingInspection */
11550
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11551
                }
11552
11553 10
                return self::decimal_to_chr($cp);
11554 43
            },
11555 43
            $buf
11556
        );
11557
11558 43
        if ($buf === null) {
11559
            return '';
11560
        }
11561
11562
        // decode UTF-8 codepoints
11563 43
        if ($decode_html_entity_to_utf8 === true) {
11564 3
            $buf = self::html_entity_decode($buf);
11565
        }
11566
11567 43
        return $buf;
11568
    }
11569
11570
    /**
11571
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
11572
     *
11573
     * INFO: This is slower then "trim()"
11574
     *
11575
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11576
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11577
     *
11578
     * @param string      $str   <p>The string to be trimmed</p>
11579
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11580
     *
11581
     * @return string the trimmed string
11582
     */
11583
    public static function trim(string $str = '', string $chars = null): string
11584
    {
11585 56
        if ($str === '') {
11586 9
            return '';
11587
        }
11588
11589 49
        if (self::$SUPPORT['mbstring'] === true) {
11590 49
            if ($chars) {
11591
                /** @noinspection PregQuoteUsageInspection */
11592 27
                $chars = \preg_quote($chars);
11593 27
                $pattern = "^[${chars}]+|[${chars}]+\$";
11594
            } else {
11595 22
                $pattern = '^[\\s]+|[\\s]+$';
11596
            }
11597
11598
            /** @noinspection PhpComposerExtensionStubsInspection */
11599 49
            return (string) \mb_ereg_replace($pattern, '', $str);
11600
        }
11601
11602 8
        if ($chars) {
11603
            $chars = \preg_quote($chars, '/');
11604
            $pattern = "^[${chars}]+|[${chars}]+\$";
11605
        } else {
11606 8
            $pattern = '^[\\s]+|[\\s]+$';
11607
        }
11608
11609 8
        return self::regex_replace($str, $pattern, '', '', '/');
11610
    }
11611
11612
    /**
11613
     * Makes string's first char uppercase.
11614
     *
11615
     * @param string      $str                           <p>The input string.</p>
11616
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11617
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11618
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11619
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11620
     *
11621
     * @return string the resulting string
11622
     */
11623
    public static function ucfirst(
11624
        string $str,
11625
        string $encoding = 'UTF-8',
11626
        bool $clean_utf8 = false,
11627
        string $lang = null,
11628
        bool $try_to_keep_the_string_length = false
11629
    ): string {
11630 69
        if ($str === '') {
11631 3
            return '';
11632
        }
11633
11634 68
        if ($clean_utf8 === true) {
11635
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11636
            // if invalid characters are found in $haystack before $needle
11637 1
            $str = self::clean($str);
11638
        }
11639
11640 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11641
11642 68
        if ($encoding === 'UTF-8') {
11643 22
            $str_part_two = (string) \mb_substr($str, 1);
11644
11645 22
            if ($use_mb_functions === true) {
11646 22
                $str_part_one = \mb_strtoupper(
11647 22
                    (string) \mb_substr($str, 0, 1)
11648
                );
11649
            } else {
11650
                $str_part_one = self::strtoupper(
11651
                    (string) \mb_substr($str, 0, 1),
11652
                    $encoding,
11653
                    false,
11654
                    $lang,
11655 22
                    $try_to_keep_the_string_length
11656
                );
11657
            }
11658
        } else {
11659 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11660
11661 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11662
11663 47
            if ($use_mb_functions === true) {
11664 47
                $str_part_one = \mb_strtoupper(
11665 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11666 47
                    $encoding
11667
                );
11668
            } else {
11669
                $str_part_one = self::strtoupper(
11670
                    (string) self::substr($str, 0, 1, $encoding),
11671
                    $encoding,
11672
                    false,
11673
                    $lang,
11674
                    $try_to_keep_the_string_length
11675
                );
11676
            }
11677
        }
11678
11679 68
        return $str_part_one . $str_part_two;
11680
    }
11681
11682
    /**
11683
     * alias for "UTF8::ucfirst()"
11684
     *
11685
     * @param string $str
11686
     * @param string $encoding
11687
     * @param bool   $clean_utf8
11688
     *
11689
     * @return string
11690
     *
11691
     * @see UTF8::ucfirst()
11692
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
11693
     */
11694
    public static function ucword(
11695
        string $str,
11696
        string $encoding = 'UTF-8',
11697
        bool $clean_utf8 = false
11698
    ): string {
11699 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11700
    }
11701
11702
    /**
11703
     * Uppercase for all words in the string.
11704
     *
11705
     * @param string   $str        <p>The input string.</p>
11706
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11707
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11708
     *                             word.</p>
11709
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11710
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11711
     *
11712
     * @return string
11713
     */
11714
    public static function ucwords(
11715
        string $str,
11716
        array $exceptions = [],
11717
        string $char_list = '',
11718
        string $encoding = 'UTF-8',
11719
        bool $clean_utf8 = false
11720
    ): string {
11721 8
        if (!$str) {
11722 2
            return '';
11723
        }
11724
11725
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11726
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11727
11728 7
        if ($clean_utf8 === true) {
11729
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11730
            // if invalid characters are found in $haystack before $needle
11731 1
            $str = self::clean($str);
11732
        }
11733
11734 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11735
11736
        if (
11737 7
            $use_php_default_functions === true
11738
            &&
11739 7
            ASCII::is_ascii($str) === true
11740
        ) {
11741
            return \ucwords($str);
11742
        }
11743
11744 7
        $words = self::str_to_words($str, $char_list);
11745 7
        $use_exceptions = $exceptions !== [];
11746
11747 7
        $words_str = '';
11748 7
        foreach ($words as &$word) {
11749 7
            if (!$word) {
11750 7
                continue;
11751
            }
11752
11753
            if (
11754 7
                $use_exceptions === false
11755
                ||
11756 7
                !\in_array($word, $exceptions, true)
11757
            ) {
11758 7
                $words_str .= self::ucfirst($word, $encoding);
11759
            } else {
11760 7
                $words_str .= $word;
11761
            }
11762
        }
11763
11764 7
        return $words_str;
11765
    }
11766
11767
    /**
11768
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
11769
     *
11770
     * e.g:
11771
     * 'test+test'                     => 'test test'
11772
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11773
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11774
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11775
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11776
     * 'Düsseldorf'                   => 'Düsseldorf'
11777
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11778
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11779
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11780
     *
11781
     * @param string $str          <p>The input string.</p>
11782
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11783
     *
11784
     * @return string
11785
     */
11786
    public static function urldecode(string $str, bool $multi_decode = true): string
11787
    {
11788 4
        if ($str === '') {
11789 3
            return '';
11790
        }
11791
11792
        if (
11793 4
            \strpos($str, '&') === false
11794
            &&
11795 4
            \strpos($str, '%') === false
11796
            &&
11797 4
            \strpos($str, '+') === false
11798
            &&
11799 4
            \strpos($str, '\u') === false
11800
        ) {
11801 3
            return self::fix_simple_utf8($str);
11802
        }
11803
11804 4
        $str = self::urldecode_unicode_helper($str);
11805
11806 4
        if ($multi_decode === true) {
11807
            do {
11808 3
                $str_compare = $str;
11809
11810
                /**
11811
                 * @psalm-suppress PossiblyInvalidArgument
11812
                 */
11813 3
                $str = self::fix_simple_utf8(
11814 3
                    \urldecode(
11815 3
                        self::html_entity_decode(
11816 3
                            self::to_utf8($str),
11817 3
                            \ENT_QUOTES | \ENT_HTML5
11818
                        )
11819
                    )
11820
                );
11821 3
            } while ($str_compare !== $str);
11822
        } else {
11823
            /**
11824
             * @psalm-suppress PossiblyInvalidArgument
11825
             */
11826 1
            $str = self::fix_simple_utf8(
11827 1
                \urldecode(
11828 1
                    self::html_entity_decode(
11829 1
                        self::to_utf8($str),
11830 1
                        \ENT_QUOTES | \ENT_HTML5
11831
                    )
11832
                )
11833
            );
11834
        }
11835
11836 4
        return $str;
11837
    }
11838
11839
    /**
11840
     * Return a array with "urlencoded"-win1252 -> UTF-8
11841
     *
11842
     * @return string[]
11843
     *
11844
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11845
     */
11846
    public static function urldecode_fix_win1252_chars(): array
11847
    {
11848
        return [
11849 2
            '%20' => ' ',
11850
            '%21' => '!',
11851
            '%22' => '"',
11852
            '%23' => '#',
11853
            '%24' => '$',
11854
            '%25' => '%',
11855
            '%26' => '&',
11856
            '%27' => "'",
11857
            '%28' => '(',
11858
            '%29' => ')',
11859
            '%2A' => '*',
11860
            '%2B' => '+',
11861
            '%2C' => ',',
11862
            '%2D' => '-',
11863
            '%2E' => '.',
11864
            '%2F' => '/',
11865
            '%30' => '0',
11866
            '%31' => '1',
11867
            '%32' => '2',
11868
            '%33' => '3',
11869
            '%34' => '4',
11870
            '%35' => '5',
11871
            '%36' => '6',
11872
            '%37' => '7',
11873
            '%38' => '8',
11874
            '%39' => '9',
11875
            '%3A' => ':',
11876
            '%3B' => ';',
11877
            '%3C' => '<',
11878
            '%3D' => '=',
11879
            '%3E' => '>',
11880
            '%3F' => '?',
11881
            '%40' => '@',
11882
            '%41' => 'A',
11883
            '%42' => 'B',
11884
            '%43' => 'C',
11885
            '%44' => 'D',
11886
            '%45' => 'E',
11887
            '%46' => 'F',
11888
            '%47' => 'G',
11889
            '%48' => 'H',
11890
            '%49' => 'I',
11891
            '%4A' => 'J',
11892
            '%4B' => 'K',
11893
            '%4C' => 'L',
11894
            '%4D' => 'M',
11895
            '%4E' => 'N',
11896
            '%4F' => 'O',
11897
            '%50' => 'P',
11898
            '%51' => 'Q',
11899
            '%52' => 'R',
11900
            '%53' => 'S',
11901
            '%54' => 'T',
11902
            '%55' => 'U',
11903
            '%56' => 'V',
11904
            '%57' => 'W',
11905
            '%58' => 'X',
11906
            '%59' => 'Y',
11907
            '%5A' => 'Z',
11908
            '%5B' => '[',
11909
            '%5C' => '\\',
11910
            '%5D' => ']',
11911
            '%5E' => '^',
11912
            '%5F' => '_',
11913
            '%60' => '`',
11914
            '%61' => 'a',
11915
            '%62' => 'b',
11916
            '%63' => 'c',
11917
            '%64' => 'd',
11918
            '%65' => 'e',
11919
            '%66' => 'f',
11920
            '%67' => 'g',
11921
            '%68' => 'h',
11922
            '%69' => 'i',
11923
            '%6A' => 'j',
11924
            '%6B' => 'k',
11925
            '%6C' => 'l',
11926
            '%6D' => 'm',
11927
            '%6E' => 'n',
11928
            '%6F' => 'o',
11929
            '%70' => 'p',
11930
            '%71' => 'q',
11931
            '%72' => 'r',
11932
            '%73' => 's',
11933
            '%74' => 't',
11934
            '%75' => 'u',
11935
            '%76' => 'v',
11936
            '%77' => 'w',
11937
            '%78' => 'x',
11938
            '%79' => 'y',
11939
            '%7A' => 'z',
11940
            '%7B' => '{',
11941
            '%7C' => '|',
11942
            '%7D' => '}',
11943
            '%7E' => '~',
11944
            '%7F' => '',
11945
            '%80' => '`',
11946
            '%81' => '',
11947
            '%82' => '‚',
11948
            '%83' => 'ƒ',
11949
            '%84' => '„',
11950
            '%85' => '…',
11951
            '%86' => '†',
11952
            '%87' => '‡',
11953
            '%88' => 'ˆ',
11954
            '%89' => '‰',
11955
            '%8A' => 'Š',
11956
            '%8B' => '‹',
11957
            '%8C' => 'Œ',
11958
            '%8D' => '',
11959
            '%8E' => 'Ž',
11960
            '%8F' => '',
11961
            '%90' => '',
11962
            '%91' => '‘',
11963
            '%92' => '’',
11964
            '%93' => '“',
11965
            '%94' => '”',
11966
            '%95' => '•',
11967
            '%96' => '–',
11968
            '%97' => '—',
11969
            '%98' => '˜',
11970
            '%99' => '™',
11971
            '%9A' => 'š',
11972
            '%9B' => '›',
11973
            '%9C' => 'œ',
11974
            '%9D' => '',
11975
            '%9E' => 'ž',
11976
            '%9F' => 'Ÿ',
11977
            '%A0' => '',
11978
            '%A1' => '¡',
11979
            '%A2' => '¢',
11980
            '%A3' => '£',
11981
            '%A4' => '¤',
11982
            '%A5' => '¥',
11983
            '%A6' => '¦',
11984
            '%A7' => '§',
11985
            '%A8' => '¨',
11986
            '%A9' => '©',
11987
            '%AA' => 'ª',
11988
            '%AB' => '«',
11989
            '%AC' => '¬',
11990
            '%AD' => '',
11991
            '%AE' => '®',
11992
            '%AF' => '¯',
11993
            '%B0' => '°',
11994
            '%B1' => '±',
11995
            '%B2' => '²',
11996
            '%B3' => '³',
11997
            '%B4' => '´',
11998
            '%B5' => 'µ',
11999
            '%B6' => '¶',
12000
            '%B7' => '·',
12001
            '%B8' => '¸',
12002
            '%B9' => '¹',
12003
            '%BA' => 'º',
12004
            '%BB' => '»',
12005
            '%BC' => '¼',
12006
            '%BD' => '½',
12007
            '%BE' => '¾',
12008
            '%BF' => '¿',
12009
            '%C0' => 'À',
12010
            '%C1' => 'Á',
12011
            '%C2' => 'Â',
12012
            '%C3' => 'Ã',
12013
            '%C4' => 'Ä',
12014
            '%C5' => 'Å',
12015
            '%C6' => 'Æ',
12016
            '%C7' => 'Ç',
12017
            '%C8' => 'È',
12018
            '%C9' => 'É',
12019
            '%CA' => 'Ê',
12020
            '%CB' => 'Ë',
12021
            '%CC' => 'Ì',
12022
            '%CD' => 'Í',
12023
            '%CE' => 'Î',
12024
            '%CF' => 'Ï',
12025
            '%D0' => 'Ð',
12026
            '%D1' => 'Ñ',
12027
            '%D2' => 'Ò',
12028
            '%D3' => 'Ó',
12029
            '%D4' => 'Ô',
12030
            '%D5' => 'Õ',
12031
            '%D6' => 'Ö',
12032
            '%D7' => '×',
12033
            '%D8' => 'Ø',
12034
            '%D9' => 'Ù',
12035
            '%DA' => 'Ú',
12036
            '%DB' => 'Û',
12037
            '%DC' => 'Ü',
12038
            '%DD' => 'Ý',
12039
            '%DE' => 'Þ',
12040
            '%DF' => 'ß',
12041
            '%E0' => 'à',
12042
            '%E1' => 'á',
12043
            '%E2' => 'â',
12044
            '%E3' => 'ã',
12045
            '%E4' => 'ä',
12046
            '%E5' => 'å',
12047
            '%E6' => 'æ',
12048
            '%E7' => 'ç',
12049
            '%E8' => 'è',
12050
            '%E9' => 'é',
12051
            '%EA' => 'ê',
12052
            '%EB' => 'ë',
12053
            '%EC' => 'ì',
12054
            '%ED' => 'í',
12055
            '%EE' => 'î',
12056
            '%EF' => 'ï',
12057
            '%F0' => 'ð',
12058
            '%F1' => 'ñ',
12059
            '%F2' => 'ò',
12060
            '%F3' => 'ó',
12061
            '%F4' => 'ô',
12062
            '%F5' => 'õ',
12063
            '%F6' => 'ö',
12064
            '%F7' => '÷',
12065
            '%F8' => 'ø',
12066
            '%F9' => 'ù',
12067
            '%FA' => 'ú',
12068
            '%FB' => 'û',
12069
            '%FC' => 'ü',
12070
            '%FD' => 'ý',
12071
            '%FE' => 'þ',
12072
            '%FF' => 'ÿ',
12073
        ];
12074
    }
12075
12076
    /**
12077
     * Decodes a UTF-8 string to ISO-8859-1.
12078
     *
12079
     * @param string $str             <p>The input string.</p>
12080
     * @param bool   $keep_utf8_chars
12081
     *
12082
     * @return string
12083
     */
12084
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12085
    {
12086 14
        if ($str === '') {
12087 6
            return '';
12088
        }
12089
12090
        // save for later comparision
12091 14
        $str_backup = $str;
12092 14
        $len = \strlen($str);
12093
12094 14
        if (self::$ORD === null) {
12095
            self::$ORD = self::getData('ord');
12096
        }
12097
12098 14
        if (self::$CHR === null) {
12099
            self::$CHR = self::getData('chr');
12100
        }
12101
12102 14
        $no_char_found = '?';
12103
        /** @noinspection ForeachInvariantsInspection */
12104 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12105 14
            switch ($str[$i] & "\xF0") {
12106 14
                case "\xC0":
12107 13
                case "\xD0":
12108 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12109 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12110
12111 13
                    break;
12112
12113
                /** @noinspection PhpMissingBreakStatementInspection */
12114 13
                case "\xF0":
12115
                    ++$i;
12116
12117
                // no break
12118
12119 13
                case "\xE0":
12120 11
                    $str[$j] = $no_char_found;
12121 11
                    $i += 2;
12122
12123 11
                    break;
12124
12125
                default:
12126 12
                    $str[$j] = $str[$i];
12127
            }
12128
        }
12129
12130
        /** @var false|string $return - needed for PhpStan (stubs error) */
12131 14
        $return = \substr($str, 0, $j);
12132 14
        if ($return === false) {
12133
            $return = '';
12134
        }
12135
12136
        if (
12137 14
            $keep_utf8_chars === true
12138
            &&
12139 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12140
        ) {
12141 2
            return $str_backup;
12142
        }
12143
12144 14
        return $return;
12145
    }
12146
12147
    /**
12148
     * Encodes an ISO-8859-1 string to UTF-8.
12149
     *
12150
     * @param string $str <p>The input string.</p>
12151
     *
12152
     * @return string
12153
     */
12154
    public static function utf8_encode(string $str): string
12155
    {
12156 14
        if ($str === '') {
12157 14
            return '';
12158
        }
12159
12160
        /** @var false|string $str - the polyfill maybe return false */
12161 14
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12161
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12162
12163
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12164
        /** @psalm-suppress TypeDoesNotContainType */
12165 14
        if ($str === false) {
12166
            return '';
12167
        }
12168
12169 14
        return $str;
12170
    }
12171
12172
    /**
12173
     * fix -> utf8-win1252 chars
12174
     *
12175
     * @param string $str <p>The input string.</p>
12176
     *
12177
     * @return string
12178
     *
12179
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
12180
     */
12181
    public static function utf8_fix_win1252_chars(string $str): string
12182
    {
12183 2
        return self::fix_simple_utf8($str);
12184
    }
12185
12186
    /**
12187
     * Returns an array with all utf8 whitespace characters.
12188
     *
12189
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12190
     *
12191
     * @return string[]
12192
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12193
     *                  as defined in above URL
12194
     */
12195
    public static function whitespace_table(): array
12196
    {
12197 2
        return self::$WHITESPACE_TABLE;
12198
    }
12199
12200
    /**
12201
     * Limit the number of words in a string.
12202
     *
12203
     * @param string $str        <p>The input string.</p>
12204
     * @param int    $limit      <p>The limit of words as integer.</p>
12205
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12206
     *
12207
     * @return string
12208
     */
12209
    public static function words_limit(
12210
        string $str,
12211
        int $limit = 100,
12212
        string $str_add_on = '…'
12213
    ): string {
12214 2
        if ($str === '' || $limit < 1) {
12215 2
            return '';
12216
        }
12217
12218 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12219
12220
        if (
12221 2
            !isset($matches[0])
12222
            ||
12223 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12224
        ) {
12225 2
            return $str;
12226
        }
12227
12228 2
        return \rtrim($matches[0]) . $str_add_on;
12229
    }
12230
12231
    /**
12232
     * Wraps a string to a given number of characters
12233
     *
12234
     * @see http://php.net/manual/en/function.wordwrap.php
12235
     *
12236
     * @param string $str   <p>The input string.</p>
12237
     * @param int    $width [optional] <p>The column width.</p>
12238
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12239
     * @param bool   $cut   [optional] <p>
12240
     *                      If the cut is set to true, the string is
12241
     *                      always wrapped at or before the specified width. So if you have
12242
     *                      a word that is larger than the given width, it is broken apart.
12243
     *                      </p>
12244
     *
12245
     * @return string
12246
     *                <p>The given string wrapped at the specified column.</p>
12247
     */
12248
    public static function wordwrap(
12249
        string $str,
12250
        int $width = 75,
12251
        string $break = "\n",
12252
        bool $cut = false
12253
    ): string {
12254 12
        if ($str === '' || $break === '') {
12255 4
            return '';
12256
        }
12257
12258 10
        $str_split = \explode($break, $str);
12259 10
        if ($str_split === false) {
12260
            return '';
12261
        }
12262
12263 10
        $chars = [];
12264 10
        $word_split = '';
12265 10
        foreach ($str_split as $i => $i_value) {
12266 10
            if ($i) {
12267 3
                $chars[] = $break;
12268 3
                $word_split .= '#';
12269
            }
12270
12271 10
            foreach (self::str_split($i_value) as $c) {
12272 10
                $chars[] = $c;
12273 10
                if ($c === ' ') {
12274 3
                    $word_split .= ' ';
12275
                } else {
12276 10
                    $word_split .= '?';
12277
                }
12278
            }
12279
        }
12280
12281 10
        $str_return = '';
12282 10
        $j = 0;
12283 10
        $b = -1;
12284 10
        $i = -1;
12285 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12286
12287 10
        $max = \mb_strlen($word_split);
12288 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12289 8
            for (++$i; $i < $b; ++$i) {
12290 8
                $str_return .= $chars[$j];
12291 8
                unset($chars[$j++]);
12292
12293
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12294 8
                if ($i > $max) {
12295
                    break 2;
12296
                }
12297
            }
12298
12299
            if (
12300 8
                $break === $chars[$j]
12301
                ||
12302 8
                $chars[$j] === ' '
12303
            ) {
12304 5
                unset($chars[$j++]);
12305
            }
12306
12307 8
            $str_return .= $break;
12308
12309
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12310 8
            if ($b > $max) {
12311
                break;
12312
            }
12313
        }
12314
12315 10
        return $str_return . \implode('', $chars);
12316
    }
12317
12318
    /**
12319
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12320
     *    ... so that we wrap the per line.
12321
     *
12322
     * @param string      $str             <p>The input string.</p>
12323
     * @param int         $width           [optional] <p>The column width.</p>
12324
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12325
     * @param bool        $cut             [optional] <p>
12326
     *                                     If the cut is set to true, the string is
12327
     *                                     always wrapped at or before the specified width. So if you have
12328
     *                                     a word that is larger than the given width, it is broken apart.
12329
     *                                     </p>
12330
     * @param bool        $add_final_break [optional] <p>
12331
     *                                     If this flag is true, then the method will add a $break at the end
12332
     *                                     of the result string.
12333
     *                                     </p>
12334
     * @param string|null $delimiter       [optional] <p>
12335
     *                                     You can change the default behavior, where we split the string by newline.
12336
     *                                     </p>
12337
     *
12338
     * @return string
12339
     */
12340
    public static function wordwrap_per_line(
12341
        string $str,
12342
        int $width = 75,
12343
        string $break = "\n",
12344
        bool $cut = false,
12345
        bool $add_final_break = true,
12346
        string $delimiter = null
12347
    ): string {
12348 1
        if ($delimiter === null) {
12349 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12350
        } else {
12351 1
            $strings = \explode($delimiter, $str);
12352
        }
12353
12354 1
        $string_helper_array = [];
12355 1
        if ($strings !== false) {
12356 1
            foreach ($strings as $value) {
12357 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12358
            }
12359
        }
12360
12361 1
        if ($add_final_break) {
12362 1
            $final_break = $break;
12363
        } else {
12364 1
            $final_break = '';
12365
        }
12366
12367 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12368
    }
12369
12370
    /**
12371
     * Returns an array of Unicode White Space characters.
12372
     *
12373
     * @return string[] an array with numeric code point as key and White Space Character as value
12374
     */
12375
    public static function ws(): array
12376
    {
12377 2
        return self::$WHITESPACE;
12378
    }
12379
12380
    /**
12381
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
12382
     *
12383
     * @see http://hsivonen.iki.fi/php-utf8/
12384
     *
12385
     * @param string $str    <p>The string to be checked.</p>
12386
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12387
     *
12388
     * @return bool
12389
     *
12390
     * @noinspection ReturnTypeCanBeDeclaredInspection
12391
     */
12392
    private static function is_utf8_string(string $str, bool $strict = false)
12393
    {
12394 108
        if ($str === '') {
12395 14
            return true;
12396
        }
12397
12398 102
        if ($strict === true) {
12399 2
            $is_binary = self::is_binary($str, true);
12400
12401 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12402 2
                return false;
12403
            }
12404
12405
            if ($is_binary && self::is_utf32($str, false) !== false) {
12406
                return false;
12407
            }
12408
        }
12409
12410 102
        if (self::pcre_utf8_support() !== true) {
12411
            // If even just the first character can be matched, when the /u
12412
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12413
            // invalid, nothing at all will match, even if the string contains
12414
            // some valid sequences
12415
            return \preg_match('/^./us', $str, $ar) === 1;
12416
        }
12417
12418 102
        $mState = 0; // cached expected number of octets after the current octet
12419
        // until the beginning of the next UTF8 character sequence
12420 102
        $mUcs4 = 0; // cached Unicode character
12421 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12422
12423 102
        if (self::$ORD === null) {
12424
            self::$ORD = self::getData('ord');
12425
        }
12426
12427 102
        $len = \strlen($str);
12428
        /** @noinspection ForeachInvariantsInspection */
12429 102
        for ($i = 0; $i < $len; ++$i) {
12430 102
            $in = self::$ORD[$str[$i]];
12431
12432 102
            if ($mState === 0) {
12433
                // When mState is zero we expect either a US-ASCII character or a
12434
                // multi-octet sequence.
12435 102
                if ((0x80 & $in) === 0) {
12436
                    // US-ASCII, pass straight through.
12437 97
                    $mBytes = 1;
12438 83
                } elseif ((0xE0 & $in) === 0xC0) {
12439
                    // First octet of 2 octet sequence.
12440 73
                    $mUcs4 = $in;
12441 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12442 73
                    $mState = 1;
12443 73
                    $mBytes = 2;
12444 58
                } elseif ((0xF0 & $in) === 0xE0) {
12445
                    // First octet of 3 octet sequence.
12446 42
                    $mUcs4 = $in;
12447 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12448 42
                    $mState = 2;
12449 42
                    $mBytes = 3;
12450 29
                } elseif ((0xF8 & $in) === 0xF0) {
12451
                    // First octet of 4 octet sequence.
12452 18
                    $mUcs4 = $in;
12453 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12454 18
                    $mState = 3;
12455 18
                    $mBytes = 4;
12456 13
                } elseif ((0xFC & $in) === 0xF8) {
12457
                    /* First octet of 5 octet sequence.
12458
                     *
12459
                     * This is illegal because the encoded codepoint must be either
12460
                     * (a) not the shortest form or
12461
                     * (b) outside the Unicode range of 0-0x10FFFF.
12462
                     * Rather than trying to resynchronize, we will carry on until the end
12463
                     * of the sequence and let the later error handling code catch it.
12464
                     */
12465 5
                    $mUcs4 = $in;
12466 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12467 5
                    $mState = 4;
12468 5
                    $mBytes = 5;
12469 10
                } elseif ((0xFE & $in) === 0xFC) {
12470
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12471 5
                    $mUcs4 = $in;
12472 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12473 5
                    $mState = 5;
12474 5
                    $mBytes = 6;
12475
                } else {
12476
                    // Current octet is neither in the US-ASCII range nor a legal first
12477
                    // octet of a multi-octet sequence.
12478 102
                    return false;
12479
                }
12480 83
            } elseif ((0xC0 & $in) === 0x80) {
12481
12482
                // When mState is non-zero, we expect a continuation of the multi-octet
12483
                // sequence
12484
12485
                // Legal continuation.
12486 75
                $shift = ($mState - 1) * 6;
12487 75
                $tmp = $in;
12488 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12489 75
                $mUcs4 |= $tmp;
12490
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12491
                // Unicode code point to be output.
12492 75
                if (--$mState === 0) {
12493
                    // Check for illegal sequences and code points.
12494
                    //
12495
                    // From Unicode 3.1, non-shortest form is illegal
12496
                    if (
12497 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12498
                        ||
12499 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12500
                        ||
12501 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12502
                        ||
12503 75
                        ($mBytes > 4)
12504
                        ||
12505
                        // From Unicode 3.2, surrogate characters are illegal.
12506 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12507
                        ||
12508
                        // Code points outside the Unicode range are illegal.
12509 75
                        ($mUcs4 > 0x10FFFF)
12510
                    ) {
12511 9
                        return false;
12512
                    }
12513
                    // initialize UTF8 cache
12514 75
                    $mState = 0;
12515 75
                    $mUcs4 = 0;
12516 75
                    $mBytes = 1;
12517
                }
12518
            } else {
12519
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12520
                // Incomplete multi-octet sequence.
12521 35
                return false;
12522
            }
12523
        }
12524
12525 67
        return true;
12526
    }
12527
12528
    /**
12529
     * @param string $str
12530
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12531
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12532
     *
12533
     * @return string
12534
     *
12535
     * @noinspection ReturnTypeCanBeDeclaredInspection
12536
     */
12537
    private static function fixStrCaseHelper(
12538
        string $str,
12539
        $use_lowercase = false,
12540
        $use_full_case_fold = false
12541
    ) {
12542 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12543 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12544
12545 33
        if ($use_lowercase === true) {
12546 2
            $str = \str_replace(
12547 2
                $upper,
12548 2
                $lower,
12549 2
                $str
12550
            );
12551
        } else {
12552 31
            $str = \str_replace(
12553 31
                $lower,
12554 31
                $upper,
12555 31
                $str
12556
            );
12557
        }
12558
12559 33
        if ($use_full_case_fold) {
12560 31
            static $FULL_CASE_FOLD = null;
12561 31
            if ($FULL_CASE_FOLD === null) {
12562 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12563
            }
12564
12565 31
            if ($use_lowercase === true) {
12566 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12567
            } else {
12568 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12569
            }
12570
        }
12571
12572 33
        return $str;
12573
    }
12574
12575
    /**
12576
     * get data from "/data/*.php"
12577
     *
12578
     * @param string $file
12579
     *
12580
     * @return array
12581
     *
12582
     * @noinspection ReturnTypeCanBeDeclaredInspection
12583
     */
12584
    private static function getData(string $file)
12585
    {
12586
        /** @noinspection PhpIncludeInspection */
12587
        /** @noinspection UsingInclusionReturnValueInspection */
12588
        /** @psalm-suppress UnresolvableInclude */
12589 6
        return include __DIR__ . '/data/' . $file . '.php';
12590
    }
12591
12592
    /**
12593
     * @return true|null
12594
     */
12595
    private static function initEmojiData()
12596
    {
12597 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12598 1
            if (self::$EMOJI === null) {
12599 1
                self::$EMOJI = self::getData('emoji');
12600
            }
12601
12602 1
            \uksort(
12603 1
                self::$EMOJI,
12604
                static function (string $a, string $b): int {
12605 1
                    return \strlen($b) <=> \strlen($a);
12606 1
                }
12607
            );
12608
12609 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12610 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12611
12612 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12613 1
                $tmp_key = \crc32($key);
12614 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12615
            }
12616
12617 1
            return true;
12618
        }
12619
12620 12
        return null;
12621
    }
12622
12623
    /**
12624
     * Checks whether mbstring "overloaded" is active on the server.
12625
     *
12626
     * @return bool
12627
     *
12628
     * @noinspection ReturnTypeCanBeDeclaredInspection
12629
     */
12630
    private static function mbstring_overloaded()
12631
    {
12632
        /**
12633
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12634
         */
12635
12636
        /** @noinspection PhpComposerExtensionStubsInspection */
12637
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12638
        return \defined('MB_OVERLOAD_STRING')
12639
               &&
12640
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12641
    }
12642
12643
    /**
12644
     * @param array    $strings
12645
     * @param bool     $remove_empty_values
12646
     * @param int|null $remove_short_values
12647
     *
12648
     * @return array
12649
     *
12650
     * @noinspection ReturnTypeCanBeDeclaredInspection
12651
     */
12652
    private static function reduce_string_array(
12653
        array $strings,
12654
        bool $remove_empty_values,
12655
        int $remove_short_values = null
12656
    ) {
12657
        // init
12658 2
        $return = [];
12659
12660 2
        foreach ($strings as &$str) {
12661
            if (
12662 2
                $remove_short_values !== null
12663
                &&
12664 2
                \mb_strlen($str) <= $remove_short_values
12665
            ) {
12666 2
                continue;
12667
            }
12668
12669
            if (
12670 2
                $remove_empty_values === true
12671
                &&
12672 2
                \trim($str) === ''
12673
            ) {
12674 2
                continue;
12675
            }
12676
12677 2
            $return[] = $str;
12678
        }
12679
12680 2
        return $return;
12681
    }
12682
12683
    /**
12684
     * rxClass
12685
     *
12686
     * @param string $s
12687
     * @param string $class
12688
     *
12689
     * @return string
12690
     *
12691
     * @noinspection ReturnTypeCanBeDeclaredInspection
12692
     */
12693
    private static function rxClass(string $s, string $class = '')
12694
    {
12695 33
        static $RX_CLASS_CACHE = [];
12696
12697 33
        $cache_key = $s . $class;
12698
12699 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12700 21
            return $RX_CLASS_CACHE[$cache_key];
12701
        }
12702
12703 16
        $class_array = [$class];
12704
12705
        /** @noinspection SuspiciousLoopInspection */
12706
        /** @noinspection AlterInForeachInspection */
12707 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12708 15
            if ($s === '-') {
12709
                $class_array[0] = '-' . $class_array[0];
12710 15
            } elseif (!isset($s[2])) {
12711 15
                $class_array[0] .= \preg_quote($s, '/');
12712 1
            } elseif (self::strlen($s) === 1) {
12713 1
                $class_array[0] .= $s;
12714
            } else {
12715 15
                $class_array[] = $s;
12716
            }
12717
        }
12718
12719 16
        if ($class_array[0]) {
12720 16
            $class_array[0] = '[' . $class_array[0] . ']';
12721
        }
12722
12723 16
        if (\count($class_array) === 1) {
12724 16
            $return = $class_array[0];
12725
        } else {
12726
            $return = '(?:' . \implode('|', $class_array) . ')';
12727
        }
12728
12729 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12730
12731 16
        return $return;
12732
    }
12733
12734
    /**
12735
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12736
     *
12737
     * @param string $names
12738
     * @param string $delimiter
12739
     * @param string $encoding
12740
     *
12741
     * @return string
12742
     *
12743
     * @noinspection ReturnTypeCanBeDeclaredInspection
12744
     */
12745
    private static function str_capitalize_name_helper(
12746
        string $names,
12747
        string $delimiter,
12748
        string $encoding = 'UTF-8'
12749
    ) {
12750
        // init
12751 1
        $name_helper_array = \explode($delimiter, $names);
12752 1
        if ($name_helper_array === false) {
12753
            return '';
12754
        }
12755
12756
        $special_cases = [
12757 1
            'names' => [
12758
                'ab',
12759
                'af',
12760
                'al',
12761
                'and',
12762
                'ap',
12763
                'bint',
12764
                'binte',
12765
                'da',
12766
                'de',
12767
                'del',
12768
                'den',
12769
                'der',
12770
                'di',
12771
                'dit',
12772
                'ibn',
12773
                'la',
12774
                'mac',
12775
                'nic',
12776
                'of',
12777
                'ter',
12778
                'the',
12779
                'und',
12780
                'van',
12781
                'von',
12782
                'y',
12783
                'zu',
12784
            ],
12785
            'prefixes' => [
12786
                'al-',
12787
                "d'",
12788
                'ff',
12789
                "l'",
12790
                'mac',
12791
                'mc',
12792
                'nic',
12793
            ],
12794
        ];
12795
12796 1
        foreach ($name_helper_array as &$name) {
12797 1
            if (\in_array($name, $special_cases['names'], true)) {
12798 1
                continue;
12799
            }
12800
12801 1
            $continue = false;
12802
12803 1
            if ($delimiter === '-') {
12804
                /** @noinspection AlterInForeachInspection */
12805 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12806 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12807 1
                        $continue = true;
12808
                    }
12809
                }
12810
            }
12811
12812
            /** @noinspection AlterInForeachInspection */
12813 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12814 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12815 1
                    $continue = true;
12816
                }
12817
            }
12818
12819 1
            if ($continue === true) {
12820 1
                continue;
12821
            }
12822
12823 1
            $name = self::ucfirst($name);
12824
        }
12825
12826 1
        return \implode($delimiter, $name_helper_array);
12827
    }
12828
12829
    /**
12830
     * Generic case-sensitive transformation for collation matching.
12831
     *
12832
     * @param string $str <p>The input string</p>
12833
     *
12834
     * @return string|null
12835
     */
12836
    private static function strtonatfold(string $str)
12837
    {
12838
        /** @noinspection PhpUndefinedClassInspection */
12839 6
        return \preg_replace(
12840 6
            '/\p{Mn}+/u',
12841 6
            '',
12842 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12843
        );
12844
    }
12845
12846
    /**
12847
     * @param int|string $input
12848
     *
12849
     * @return string
12850
     *
12851
     * @noinspection ReturnTypeCanBeDeclaredInspection
12852
     */
12853
    private static function to_utf8_convert_helper($input)
12854
    {
12855
        // init
12856 31
        $buf = '';
12857
12858 31
        if (self::$ORD === null) {
12859 1
            self::$ORD = self::getData('ord');
12860
        }
12861
12862 31
        if (self::$CHR === null) {
12863 1
            self::$CHR = self::getData('chr');
12864
        }
12865
12866 31
        if (self::$WIN1252_TO_UTF8 === null) {
12867 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12868
        }
12869
12870 31
        $ordC1 = self::$ORD[$input];
12871 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12872 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12873
        } else {
12874
            /** @noinspection OffsetOperationsInspection */
12875 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12876 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12877 1
            $buf .= $cc1 . $cc2;
12878
        }
12879
12880 31
        return $buf;
12881
    }
12882
12883
    /**
12884
     * @param string $str
12885
     *
12886
     * @return string
12887
     *
12888
     * @noinspection ReturnTypeCanBeDeclaredInspection
12889
     */
12890
    private static function urldecode_unicode_helper(string $str)
12891
    {
12892 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12893 10
        if (\preg_match($pattern, $str)) {
12894 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12895
        }
12896
12897 10
        return $str;
12898
    }
12899
}
12900