Passed
Push — master ( e70be3...4b4b13 )
by Lars
03:40
created

UTF8   F

Complexity

Total Complexity 1696

Size/Duplication

Total Lines 12866
Duplicated Lines 0 %

Test Coverage

Coverage 79.54%

Importance

Changes 97
Bugs 51 Features 6
Metric Value
eloc 4351
c 97
b 51
f 6
dl 0
loc 12866
ccs 3052
cts 3837
cp 0.7954
rs 0.8
wmc 1696

298 Methods

Rating   Name   Duplication   Size   Complexity  
A ctype_loaded() 0 3 1
A decimal_to_chr() 0 3 1
A hasBom() 0 3 1
A isBinary() 0 3 1
A html_escape() 0 6 1
A isHtml() 0 3 1
A isBase64() 0 3 1
A isUtf32() 0 3 1
A is_alpha() 0 8 2
A isUtf8() 0 3 1
A htmlspecialchars() 0 15 3
A intlChar_loaded() 0 3 1
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A isBom() 0 3 1
A int_to_chr() 0 3 1
A iconv_loaded() 0 3 1
A isAscii() 0 3 1
A isUtf16() 0 3 1
A is_alphanumeric() 0 8 2
A int_to_hex() 0 7 2
A htmlentities() 0 28 3
A isJson() 0 3 1
B is_json() 0 29 8
A add_bom_to_string() 0 7 2
A array_change_key_case() 0 23 5
A chr_to_int() 0 3 1
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 3 1
A access() 0 11 4
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
B chr_to_decimal() 0 38 8
D chr() 0 101 18
A chunk_split() 0 3 1
A chr_map() 0 5 1
A chr_size_list() 0 17 3
A checkForSupport() 0 47 4
A chr_to_hex() 0 11 3
A str_substr_after_first_separator() 0 28 6
A file_has_bom() 0 8 2
A str_begins() 0 3 1
A max() 0 14 3
B str_camelize() 0 70 10
A parse_str() 0 16 4
A filter_input() 0 13 3
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A strnatcasecmp() 0 5 1
A encode_mimeheader() 0 25 5
A substr_left() 0 15 4
A count_chars() 0 11 1
D strlen() 0 99 19
A str_isubstr_last() 0 25 4
A str_replace_beginning() 0 24 6
A has_uppercase() 0 8 2
A remove_left() 0 24 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 9 1
A str_iends_with() 0 11 3
A max_chr_width() 0 8 2
C utf8_decode() 0 61 13
A ltrim() 0 27 5
A emoji_decode() 0 18 2
A is_utf8() 0 13 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 54 10
C wordwrap() 0 68 14
B ucfirst() 0 57 7
A lcword() 0 13 1
A str_pad_both() 0 12 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A toUTF8() 0 3 1
A string() 0 12 3
D normalize_encoding() 0 142 16
B rxClass() 0 39 8
B get_file_type() 0 61 7
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 160 7
C is_utf16() 0 68 16
C filter() 0 59 13
A normalize_whitespace() 0 9 1
A str_starts_with() 0 11 3
A str_humanize() 0 15 1
A is_html() 0 14 2
A decode_mimeheader() 0 15 5
C substr_count_in_byte() 0 55 15
A html_decode() 0 6 1
A strchr() 0 13 1
A strichr() 0 13 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 27 5
C str_longest_common_substring() 0 76 16
A regex_replace() 0 20 3
A titlecase() 0 31 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 51 8
A str_isubstr_before_first_separator() 0 19 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 15 4
A emoji_encode() 0 18 2
A str_matches_pattern() 0 3 1
C str_titleize() 0 69 12
A ws() 0 3 1
B get_random_string() 0 56 10
A str_replace_first() 0 20 2
A fix_utf8() 0 30 4
A toLatin1() 0 3 1
A str_pad_right() 0 12 1
B ucwords() 0 51 9
A first_char() 0 14 4
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 43 8
A str_iends() 0 3 1
A css_stripe_media_queries() 0 6 1
A trim() 0 27 5
A clean() 0 48 6
A is_serialized() 0 11 3
A str_upper_camelize() 0 8 1
A is_uppercase() 0 8 2
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 24 6
A string_has_bom() 0 10 3
B strtr() 0 34 8
A str_contains_all() 0 23 6
A is_ascii() 0 3 1
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 26 5
D range() 0 71 23
B strspn() 0 30 10
A strcasecmp() 0 21 1
A str_transliterate() 0 6 1
B rawurldecode() 0 51 8
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 82 10
A utf8_encode() 0 16 3
A normalize_msword() 0 3 1
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 11 3
A is_blank() 0 8 2
A str_replace() 0 14 1
A substr_iright() 0 15 4
D getCharDirection() 0 105 118
A replace() 0 11 2
A filter_var_array() 0 12 2
A to_iso8859() 0 16 4
A words_limit() 0 20 5
A strip_tags() 0 18 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 24 6
D str_truncate_safe() 0 78 18
A codepoints() 0 36 5
A substr_right() 0 31 6
A lowerCaseFirst() 0 13 1
D str_split() 0 129 29
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
A cleanup() 0 25 2
F strrpos() 0 119 25
A remove_right() 0 25 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 9 1
A single_chr_html_encode() 0 18 4
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B is_binary() 0 35 9
B strtocasefold() 0 33 7
A lcfirst() 0 44 5
A tabs_to_spaces() 0 11 3
A finfo_loaded() 0 3 1
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
A fits_inside() 0 3 1
A to_ascii() 0 6 1
A is_binary_file() 0 16 3
A reduce_string_array() 0 29 6
B str_longest_common_prefix() 0 51 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 12 1
A str_substr_first() 0 33 6
A remove_bom() 0 22 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 31 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 26 5
F extract_text() 0 175 34
A json_loaded() 0 3 1
B str_snakeize() 0 55 6
A is_lowercase() 0 8 2
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 6 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A toAscii() 0 6 1
A str_ibegins() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 55 11
A lcwords() 0 34 6
A str_upper_first() 0 13 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A filter_var() 0 12 2
A substr_ileft() 0 15 4
A is_empty() 0 3 1
B html_encode() 0 53 11
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
F encode() 0 140 37
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C is_utf32() 0 68 16
C ord() 0 72 16
A strtonatfold() 0 7 1
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
C strcspn() 0 52 12
A fixStrCaseHelper() 0 36 5
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A has_lowercase() 0 8 2
A json_encode() 0 10 2
A str_isubstr_first() 0 25 4
A is_base64() 0 20 5
A str_last_char() 0 16 4
A str_ireplace_beginning() 0 21 6
A hex_to_int() 0 14 3
A hex_to_chr() 0 3 1
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 28 5
A str_surround() 0 3 1
A strncmp() 0 19 4
A filter_input_array() 0 12 3
A str_insert() 0 28 4
A getSupportInfo() 0 13 3
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 38 5
D is_utf8_string() 0 134 28
A to_utf8_convert_helper() 0 28 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A min() 0 14 3
A collapse_whitespace() 0 8 2
C html_entity_decode() 0 55 13
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 26 4
A remove_duplicates() 0 16 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
B file_get_contents() 0 56 11
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 10 2
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    /**
10
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
11
     * This regular expression is a work around for http://bugs.exim.org/1279
12
     */
13
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
14
15
    /**
16
     * Bom => Byte-Length
17
     *
18
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
19
     *
20
     * @var array
21
     */
22
    private static $BOM = [
23
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
24
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
25
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
26
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
27
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
28
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
29
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
30
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
31
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
32
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
33
    ];
34
35
    /**
36
     * Numeric code point => UTF-8 Character
37
     *
38
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
39
     *
40
     * @var array
41
     */
42
    private static $WHITESPACE = [
43
        // NUL Byte
44
        0 => "\x0",
45
        // Tab
46
        9 => "\x9",
47
        // New Line
48
        10 => "\xa",
49
        // Vertical Tab
50
        11 => "\xb",
51
        // Carriage Return
52
        13 => "\xd",
53
        // Ordinary Space
54
        32 => "\x20",
55
        // NO-BREAK SPACE
56
        160 => "\xc2\xa0",
57
        // OGHAM SPACE MARK
58
        5760 => "\xe1\x9a\x80",
59
        // MONGOLIAN VOWEL SEPARATOR
60
        6158 => "\xe1\xa0\x8e",
61
        // EN QUAD
62
        8192 => "\xe2\x80\x80",
63
        // EM QUAD
64
        8193 => "\xe2\x80\x81",
65
        // EN SPACE
66
        8194 => "\xe2\x80\x82",
67
        // EM SPACE
68
        8195 => "\xe2\x80\x83",
69
        // THREE-PER-EM SPACE
70
        8196 => "\xe2\x80\x84",
71
        // FOUR-PER-EM SPACE
72
        8197 => "\xe2\x80\x85",
73
        // SIX-PER-EM SPACE
74
        8198 => "\xe2\x80\x86",
75
        // FIGURE SPACE
76
        8199 => "\xe2\x80\x87",
77
        // PUNCTUATION SPACE
78
        8200 => "\xe2\x80\x88",
79
        // THIN SPACE
80
        8201 => "\xe2\x80\x89",
81
        //HAIR SPACE
82
        8202 => "\xe2\x80\x8a",
83
        // LINE SEPARATOR
84
        8232 => "\xe2\x80\xa8",
85
        // PARAGRAPH SEPARATOR
86
        8233 => "\xe2\x80\xa9",
87
        // NARROW NO-BREAK SPACE
88
        8239 => "\xe2\x80\xaf",
89
        // MEDIUM MATHEMATICAL SPACE
90
        8287 => "\xe2\x81\x9f",
91
        // HALFWIDTH HANGUL FILLER
92
        65440 => "\xef\xbe\xa0",
93
        // IDEOGRAPHIC SPACE
94
        12288 => "\xe3\x80\x80",
95
    ];
96
97
    /**
98
     * @var array
99
     */
100
    private static $WHITESPACE_TABLE = [
101
        'SPACE'                     => "\x20",
102
        'NO-BREAK SPACE'            => "\xc2\xa0",
103
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
104
        'EN QUAD'                   => "\xe2\x80\x80",
105
        'EM QUAD'                   => "\xe2\x80\x81",
106
        'EN SPACE'                  => "\xe2\x80\x82",
107
        'EM SPACE'                  => "\xe2\x80\x83",
108
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
109
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
110
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
111
        'FIGURE SPACE'              => "\xe2\x80\x87",
112
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
113
        'THIN SPACE'                => "\xe2\x80\x89",
114
        'HAIR SPACE'                => "\xe2\x80\x8a",
115
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
116
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
117
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
118
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
119
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
120
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
121
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
122
    ];
123
124
    /**
125
     * @var array{upper: string[], lower: string[]}
126
     */
127
    private static $COMMON_CASE_FOLD = [
128
        'upper' => [
129
            'µ',
130
            'ſ',
131
            "\xCD\x85",
132
            'ς',
133
            'ẞ',
134
            "\xCF\x90",
135
            "\xCF\x91",
136
            "\xCF\x95",
137
            "\xCF\x96",
138
            "\xCF\xB0",
139
            "\xCF\xB1",
140
            "\xCF\xB5",
141
            "\xE1\xBA\x9B",
142
            "\xE1\xBE\xBE",
143
        ],
144
        'lower' => [
145
            'μ',
146
            's',
147
            'ι',
148
            'σ',
149
            'ß',
150
            'β',
151
            'θ',
152
            'φ',
153
            'π',
154
            'κ',
155
            'ρ',
156
            'ε',
157
            "\xE1\xB9\xA1",
158
            'ι',
159
        ],
160
    ];
161
162
    /**
163
     * @var array
164
     */
165
    private static $SUPPORT = [];
166
167
    /**
168
     * @var array|null
169
     */
170
    private static $BROKEN_UTF8_FIX;
171
172
    /**
173
     * @var array|null
174
     */
175
    private static $WIN1252_TO_UTF8;
176
177
    /**
178
     * @var array|null
179
     */
180
    private static $INTL_TRANSLITERATOR_LIST;
181
182
    /**
183
     * @var array|null
184
     */
185
    private static $ENCODINGS;
186
187
    /**
188
     * @var array|null
189
     */
190
    private static $ORD;
191
192
    /**
193
     * @var array|null
194
     */
195
    private static $EMOJI;
196
197
    /**
198
     * @var array|null
199
     */
200
    private static $EMOJI_VALUES_CACHE;
201
202
    /**
203
     * @var array|null
204
     */
205
    private static $EMOJI_KEYS_CACHE;
206
207
    /**
208
     * @var array|null
209
     */
210
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
211
212
    /**
213
     * @var array|null
214
     */
215
    private static $CHR;
216
217
    /**
218
     * __construct()
219
     */
220 33
    public function __construct()
221
    {
222 33
    }
223
224
    /**
225
     * Return the character at the specified position: $str[1] like functionality.
226
     *
227
     * @param string $str      <p>A UTF-8 string.</p>
228
     * @param int    $pos      <p>The position of character to return.</p>
229
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
230
     *
231
     * @return string single multi-byte character
232
     */
233 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
234
    {
235 3
        if ($str === '' || $pos < 0) {
236 2
            return '';
237
        }
238
239 3
        if ($encoding === 'UTF-8') {
240 3
            return (string) \mb_substr($str, $pos, 1);
241
        }
242
243
        return (string) self::substr($str, $pos, 1, $encoding);
244
    }
245
246
    /**
247
     * Prepends UTF-8 BOM character to the string and returns the whole string.
248
     *
249
     * INFO: If BOM already existed there, the Input string is returned.
250
     *
251
     * @param string $str <p>The input string.</p>
252
     *
253
     * @return string the output string that contains BOM
254
     */
255 2
    public static function add_bom_to_string(string $str): string
256
    {
257 2
        if (self::string_has_bom($str) === false) {
258 2
            $str = self::bom() . $str;
259
        }
260
261 2
        return $str;
262
    }
263
264
    /**
265
     * Changes all keys in an array.
266
     *
267
     * @param array  $array    <p>The array to work on</p>
268
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
269
     *                         or <strong>CASE_LOWER</strong> (default)</p>
270
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
271
     *
272
     * @return string[]
273
     *                  <p>An array with its keys lower- or uppercased.</p>
274
     */
275 2
    public static function array_change_key_case(
276
        array $array,
277
        int $case = \CASE_LOWER,
278
        string $encoding = 'UTF-8'
279
    ): array {
280
        if (
281 2
            $case !== \CASE_LOWER
282
            &&
283 2
            $case !== \CASE_UPPER
284
        ) {
285
            $case = \CASE_LOWER;
286
        }
287
288 2
        $return = [];
289 2
        foreach ($array as $key => &$value) {
290 2
            $key = $case === \CASE_LOWER
291 2
                ? self::strtolower((string) $key, $encoding)
292 2
                : self::strtoupper((string) $key, $encoding);
293
294 2
            $return[$key] = $value;
295
        }
296
297 2
        return $return;
298
    }
299
300
    /**
301
     * Returns the substring between $start and $end, if found, or an empty
302
     * string. An optional offset may be supplied from which to begin the
303
     * search for the start string.
304
     *
305
     * @param string $str
306
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
307
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
308
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @return string
312
     */
313 16
    public static function between(
314
        string $str,
315
        string $start,
316
        string $end,
317
        int $offset = 0,
318
        string $encoding = 'UTF-8'
319
    ): string {
320 16
        if ($encoding === 'UTF-8') {
321 8
            $start_position = \mb_strpos($str, $start, $offset);
322 8
            if ($start_position === false) {
323 1
                return '';
324
            }
325
326 7
            $substr_index = $start_position + (int) \mb_strlen($start);
327 7
            $end_position = \mb_strpos($str, $end, $substr_index);
328
            if (
329 7
                $end_position === false
330
                ||
331 7
                $end_position === $substr_index
332
            ) {
333 2
                return '';
334
            }
335
336 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
337
        }
338
339 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
340
341 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
342 8
        if ($start_position === false) {
343 1
            return '';
344
        }
345
346 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
347 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
348
        if (
349 7
            $end_position === false
350
            ||
351 7
            $end_position === $substr_index
352
        ) {
353 2
            return '';
354
        }
355
356 5
        return (string) self::substr(
357 5
            $str,
358 5
            $substr_index,
359 5
            $end_position - $substr_index,
360 5
            $encoding
361
        );
362
    }
363
364
    /**
365
     * Convert binary into a string.
366
     *
367
     * @param mixed $bin 1|0
368
     *
369
     * @return string
370
     */
371 2
    public static function binary_to_str($bin): string
372
    {
373 2
        if (!isset($bin[0])) {
374
            return '';
375
        }
376
377 2
        $convert = \base_convert($bin, 2, 16);
378 2
        if ($convert === '0') {
379 1
            return '';
380
        }
381
382 2
        return \pack('H*', $convert);
383
    }
384
385
    /**
386
     * Returns the UTF-8 Byte Order Mark Character.
387
     *
388
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
389
     *
390
     * @return string UTF-8 Byte Order Mark
391
     */
392 4
    public static function bom(): string
393
    {
394 4
        return "\xef\xbb\xbf";
395
    }
396
397
    /**
398
     * @alias of UTF8::chr_map()
399
     *
400
     * @param callable $callback
401
     * @param string   $str
402
     *
403
     * @return string[]
404
     *
405
     * @see UTF8::chr_map()
406
     */
407 2
    public static function callback($callback, string $str): array
408
    {
409 2
        return self::chr_map($callback, $str);
410
    }
411
412
    /**
413
     * Returns the character at $index, with indexes starting at 0.
414
     *
415
     * @param string $str      <p>The input string.</p>
416
     * @param int    $index    <p>Position of the character.</p>
417
     * @param string $encoding [optional] <p>Default is UTF-8</p>
418
     *
419
     * @return string the character at $index
420
     */
421 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
422
    {
423 9
        if ($encoding === 'UTF-8') {
424 5
            return (string) \mb_substr($str, $index, 1);
425
        }
426
427 4
        return (string) self::substr($str, $index, 1, $encoding);
428
    }
429
430
    /**
431
     * Returns an array consisting of the characters in the string.
432
     *
433
     * @param string $str <p>The input string.</p>
434
     *
435
     * @return string[] an array of chars
436
     */
437 3
    public static function chars(string $str): array
438
    {
439 3
        return self::str_split($str);
440
    }
441
442
    /**
443
     * This method will auto-detect your server environment for UTF-8 support.
444
     *
445
     * @return true|null
446
     *
447
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
448
     */
449 5
    public static function checkForSupport()
450
    {
451 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
452
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
453
454
            // http://php.net/manual/en/book.mbstring.php
455
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
456
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
457
            if (self::$SUPPORT['mbstring'] === true) {
458
                \mb_internal_encoding('UTF-8');
459
                /** @noinspection UnusedFunctionResultInspection */
460
                /** @noinspection PhpComposerExtensionStubsInspection */
461
                \mb_regex_encoding('UTF-8');
462
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
463
            }
464
465
            // http://php.net/manual/en/book.iconv.php
466
            self::$SUPPORT['iconv'] = self::iconv_loaded();
467
468
            // http://php.net/manual/en/book.intl.php
469
            self::$SUPPORT['intl'] = self::intl_loaded();
470
471
            // http://php.net/manual/en/class.intlchar.php
472
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
473
474
            // http://php.net/manual/en/book.ctype.php
475
            self::$SUPPORT['ctype'] = self::ctype_loaded();
476
477
            // http://php.net/manual/en/class.finfo.php
478
            self::$SUPPORT['finfo'] = self::finfo_loaded();
479
480
            // http://php.net/manual/en/book.json.php
481
            self::$SUPPORT['json'] = self::json_loaded();
482
483
            // http://php.net/manual/en/book.pcre.php
484
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
485
486
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
487
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
488
                \mb_internal_encoding('UTF-8');
489
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
490
            }
491
492
            return true;
493
        }
494
495 5
        return null;
496
    }
497
498
    /**
499
     * Generates a UTF-8 encoded character from the given code point.
500
     *
501
     * INFO: opposite to UTF8::ord()
502
     *
503
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
504
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
505
     *
506
     * @return string|null multi-byte character, returns null on failure or empty input
507
     */
508 21
    public static function chr($code_point, string $encoding = 'UTF-8')
509
    {
510
        // init
511 21
        static $CHAR_CACHE = [];
512
513 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
514 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
515
        }
516
517
        if (
518 21
            $encoding !== 'UTF-8'
519
            &&
520 21
            $encoding !== 'ISO-8859-1'
521
            &&
522 21
            $encoding !== 'WINDOWS-1252'
523
            &&
524 21
            self::$SUPPORT['mbstring'] === false
525
        ) {
526
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
527
        }
528
529 21
        $cache_key = $code_point . $encoding;
530 21
        if (isset($CHAR_CACHE[$cache_key]) === true) {
531 19
            return $CHAR_CACHE[$cache_key];
532
        }
533
534 12
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
535
536 12
            if (self::$CHR === null) {
537
                self::$CHR = self::getData('chr');
538
            }
539
540
            /**
541
             * @psalm-suppress PossiblyNullArrayAccess
542
             */
543 12
            $chr = self::$CHR[$code_point];
544
545 12
            if ($encoding !== 'UTF-8') {
546 1
                $chr = self::encode($encoding, $chr);
547
            }
548
549 12
            return $CHAR_CACHE[$cache_key] = $chr;
550
        }
551
552
        //
553
        // fallback via "IntlChar"
554
        //
555
556 5
        if (self::$SUPPORT['intlChar'] === true) {
557
            /** @noinspection PhpComposerExtensionStubsInspection */
558 5
            $chr = \IntlChar::chr($code_point);
559
560 5
            if ($encoding !== 'UTF-8') {
561
                $chr = self::encode($encoding, $chr);
562
            }
563
564 5
            return $CHAR_CACHE[$cache_key] = $chr;
565
        }
566
567
        //
568
        // fallback via vanilla php
569
        //
570
571
        if (self::$CHR === null) {
572
            self::$CHR = self::getData('chr');
573
        }
574
575
        $code_point = (int) $code_point;
576
        if ($code_point <= 0x7F) {
577
            /**
578
             * @psalm-suppress PossiblyNullArrayAccess
579
             */
580
            $chr = self::$CHR[$code_point];
581
        } elseif ($code_point <= 0x7FF) {
582
            /**
583
             * @psalm-suppress PossiblyNullArrayAccess
584
             */
585
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
586
                   self::$CHR[($code_point & 0x3F) + 0x80];
587
        } elseif ($code_point <= 0xFFFF) {
588
            /**
589
             * @psalm-suppress PossiblyNullArrayAccess
590
             */
591
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
592
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
593
                   self::$CHR[($code_point & 0x3F) + 0x80];
594
        } else {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
599
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
600
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
601
                   self::$CHR[($code_point & 0x3F) + 0x80];
602
        }
603
604
        if ($encoding !== 'UTF-8') {
605
            $chr = self::encode($encoding, $chr);
606
        }
607
608
        return $CHAR_CACHE[$cache_key] = $chr;
609
    }
610
611
    /**
612
     * Applies callback to all characters of a string.
613
     *
614
     * @param callable $callback <p>The callback function.</p>
615
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
616
     *
617
     * @return string[]
618
     *                  <p>The outcome of the callback, as array.</p>
619
     */
620 2
    public static function chr_map($callback, string $str): array
621
    {
622 2
        return \array_map(
623 2
            $callback,
624 2
            self::str_split($str)
625
        );
626
    }
627
628
    /**
629
     * Generates an array of byte length of each character of a Unicode string.
630
     *
631
     * 1 byte => U+0000  - U+007F
632
     * 2 byte => U+0080  - U+07FF
633
     * 3 byte => U+0800  - U+FFFF
634
     * 4 byte => U+10000 - U+10FFFF
635
     *
636
     * @param string $str <p>The original unicode string.</p>
637
     *
638
     * @return int[] an array of byte lengths of each character
639
     */
640 4
    public static function chr_size_list(string $str): array
641
    {
642 4
        if ($str === '') {
643 4
            return [];
644
        }
645
646 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
647
            return \array_map(
648
                static function (string $data): int {
649
                    // "mb_" is available if overload is used, so use it ...
650
                    return \mb_strlen($data, 'CP850'); // 8-BIT
651
                },
652
                self::str_split($str)
653
            );
654
        }
655
656 4
        return \array_map('\strlen', self::str_split($str));
657
    }
658
659
    /**
660
     * Get a decimal code representation of a specific character.
661
     *
662
     * @param string $char <p>The input character.</p>
663
     *
664
     * @return int
665
     */
666 4
    public static function chr_to_decimal(string $char): int
667
    {
668 4
        if (self::$SUPPORT['iconv'] === true) {
669 4
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
670 4
            if ($chr_tmp !== false) {
671
                /** @noinspection OffsetOperationsInspection */
672 4
                return \unpack('V', $chr_tmp)[1];
673
            }
674
        }
675
676
        $code = self::ord($char[0]);
677
        $bytes = 1;
678
679
        if (!($code & 0x80)) {
680
            // 0xxxxxxx
681
            return $code;
682
        }
683
684
        if (($code & 0xe0) === 0xc0) {
685
            // 110xxxxx
686
            $bytes = 2;
687
            $code &= ~0xc0;
688
        } elseif (($code & 0xf0) === 0xe0) {
689
            // 1110xxxx
690
            $bytes = 3;
691
            $code &= ~0xe0;
692
        } elseif (($code & 0xf8) === 0xf0) {
693
            // 11110xxx
694
            $bytes = 4;
695
            $code &= ~0xf0;
696
        }
697
698
        for ($i = 2; $i <= $bytes; ++$i) {
699
            // 10xxxxxx
700
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
701
        }
702
703
        return $code;
704
    }
705
706
    /**
707
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
708
     *
709
     * @param int|string $char   <p>The input character</p>
710
     * @param string     $prefix [optional]
711
     *
712
     * @return string The code point encoded as U+xxxx
713
     */
714 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
715
    {
716 2
        if ($char === '') {
717 2
            return '';
718
        }
719
720 2
        if ($char === '&#0;') {
721 2
            $char = '';
722
        }
723
724 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
725
    }
726
727
    /**
728
     * alias for "UTF8::chr_to_decimal()"
729
     *
730
     * @param string $chr
731
     *
732
     * @return int
733
     *
734
     * @see UTF8::chr_to_decimal()
735
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
736
     */
737 2
    public static function chr_to_int(string $chr): int
738
    {
739 2
        return self::chr_to_decimal($chr);
740
    }
741
742
    /**
743
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
744
     *
745
     * @param string $body         <p>The original string to be split.</p>
746
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
747
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
748
     *
749
     * @return string the chunked string
750
     */
751 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
752
    {
753 4
        return \implode($end, self::str_split($body, $chunk_length));
754
    }
755
756
    /**
757
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
758
     *
759
     * @param string $str                                     <p>The string to be sanitized.</p>
760
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
761
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
762
     *                                                        whitespace.</p>
763
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS Word chars
764
     *                                                        e.g.: "…"
765
     *                                                        => "..."</p>
766
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces, in
767
     *                                                        combination with
768
     *                                                        $normalize_whitespace</p>
769
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond question
770
     *                                                        mark e.g.: "�"</p>
771
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove invisible
772
     *                                                        characters e.g.: "\0"</p>
773
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove invisible
774
     *                                                        url encoded characters e.g.: "%0B"<br>
775
     *                                                        WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
776
     *                                                        </p>
777
     *
778
     * @return string clean UTF-8 encoded string
779
     */
780 87
    public static function clean(
781
        string $str,
782
        bool $remove_bom = false,
783
        bool $normalize_whitespace = false,
784
        bool $normalize_msword = false,
785
        bool $keep_non_breaking_space = false,
786
        bool $replace_diamond_question_mark = false,
787
        bool $remove_invisible_characters = true,
788
        bool $remove_invisible_characters_url_encoded = false
789
    ): string {
790
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
791
        // caused connection reset problem on larger strings
792
793 87
        $regex = '/
794
          (
795
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
796
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
797
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
798
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
799
            ){1,100}                      # ...one or more times
800
          )
801
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
802
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
803
        /x';
804
        /** @noinspection NotOptimalRegularExpressionsInspection */
805 87
        $str = (string) \preg_replace($regex, '$1', $str);
806
807 87
        if ($replace_diamond_question_mark === true) {
808 33
            $str = self::replace_diamond_question_mark($str, '');
809
        }
810
811 87
        if ($remove_invisible_characters === true) {
812 87
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
813
        }
814
815 87
        if ($normalize_whitespace === true) {
816 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
817
        }
818
819 87
        if ($normalize_msword === true) {
820 4
            $str = self::normalize_msword($str);
821
        }
822
823 87
        if ($remove_bom === true) {
824 37
            $str = self::remove_bom($str);
825
        }
826
827 87
        return $str;
828
    }
829
830
    /**
831
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
832
     *
833
     * @param string $str <p>The input string.</p>
834
     *
835
     * @return string
836
     */
837 33
    public static function cleanup($str): string
838
    {
839
        // init
840 33
        $str = (string) $str;
841
842 33
        if ($str === '') {
843 5
            return '';
844
        }
845
846
        // fixed ISO <-> UTF-8 Errors
847 33
        $str = self::fix_simple_utf8($str);
848
849
        // remove all none UTF-8 symbols
850
        // && remove diamond question mark (�)
851
        // && remove remove invisible characters (e.g. "\0")
852
        // && remove BOM
853
        // && normalize whitespace chars (but keep non-breaking-spaces)
854 33
        return self::clean(
855 33
            $str,
856 33
            true,
857 33
            true,
858 33
            false,
859 33
            true,
860 33
            true,
861 33
            true
862
        );
863
    }
864
865
    /**
866
     * Accepts a string or a array of strings and returns an array of Unicode code points.
867
     *
868
     * INFO: opposite to UTF8::string()
869
     *
870
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
871
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
872
     *                                 default, code points will be returned as integers.</p>
873
     *
874
     * @return array<int|string>
875
     *                           The array of code points:<br>
876
     *                           array<int> for $u_style === false<br>
877
     *                           array<string> for $u_style === true<br>
878
     */
879 12
    public static function codepoints($arg, bool $u_style = false): array
880
    {
881 12
        if (\is_string($arg) === true) {
882 12
            $arg = self::str_split($arg);
883
        }
884
885
        /**
886
         * @psalm-suppress DocblockTypeContradiction
887
         */
888 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
889 4
            return [];
890
        }
891
892 12
        if ($arg === []) {
893 7
            return [];
894
        }
895
896 11
        $arg = \array_map(
897
            [
898 11
                self::class,
899
                'ord',
900
            ],
901 11
            $arg
902
        );
903
904 11
        if ($u_style === true) {
905 2
            $arg = \array_map(
906
                [
907 2
                    self::class,
908
                    'int_to_hex',
909
                ],
910 2
                $arg
911
            );
912
        }
913
914 11
        return $arg;
915
    }
916
917
    /**
918
     * Trims the string and replaces consecutive whitespace characters with a
919
     * single space. This includes tabs and newline characters, as well as
920
     * multibyte whitespace such as the thin space and ideographic space.
921
     *
922
     * @param string $str <p>The input string.</p>
923
     *
924
     * @return string string with a trimmed $str and condensed whitespace
925
     */
926 13
    public static function collapse_whitespace(string $str): string
927
    {
928 13
        if (self::$SUPPORT['mbstring'] === true) {
929
            /** @noinspection PhpComposerExtensionStubsInspection */
930 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
931
        }
932
933
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
934
    }
935
936
    /**
937
     * Returns count of characters used in a string.
938
     *
939
     * @param string $str                     <p>The input string.</p>
940
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
941
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
942
     *
943
     * @return int[] an associative array of Character as keys and
944
     *               their count as values
945
     */
946 19
    public static function count_chars(
947
        string $str,
948
        bool $clean_utf8 = false,
949
        bool $try_to_use_mb_functions = true
950
    ): array {
951 19
        return \array_count_values(
952 19
            self::str_split(
953 19
                $str,
954 19
                1,
955 19
                $clean_utf8,
956 19
                $try_to_use_mb_functions
957
            )
958
        );
959
    }
960
961
    /**
962
     * Remove css media-queries.
963
     *
964
     * @param string $str
965
     *
966
     * @return string
967
     */
968 1
    public static function css_stripe_media_queries(string $str): string
969
    {
970 1
        return (string) \preg_replace(
971 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
972 1
            '',
973 1
            $str
974
        );
975
    }
976
977
    /**
978
     * Checks whether ctype is available on the server.
979
     *
980
     * @return bool
981
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
982
     */
983
    public static function ctype_loaded(): bool
984
    {
985
        return \extension_loaded('ctype');
986
    }
987
988
    /**
989
     * Converts an int value into a UTF-8 character.
990
     *
991
     * @param mixed $int
992
     *
993
     * @return string
994
     */
995 19
    public static function decimal_to_chr($int): string
996
    {
997 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
998
    }
999
1000
    /**
1001
     * Decodes a MIME header field
1002
     *
1003
     * @param string $str
1004
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1005
     *
1006
     * @return false|string
1007
     *                      A decoded MIME field on success,
1008
     *                      or false if an error occurs during the decoding
1009
     */
1010
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1011
    {
1012
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1013
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1014
        }
1015
1016
        if (self::$SUPPORT['iconv'] === true) {
1017
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1018
        }
1019
1020
        if ($encoding !== 'UTF-8') {
1021
            $str = self::encode($encoding, $str);
1022
        }
1023
1024
        return \mb_decode_mimeheader($str);
1025
    }
1026
1027
    /**
1028
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1029
     *
1030
     * @param string $str                            <p>The input string.</p>
1031
     * @param bool   $use_reversible_string_mappings [optional] <p>
1032
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1033
     *                                               between "emoji_encode" and "emoji_decode".</p>
1034
     *
1035
     * @return string
1036
     */
1037 9
    public static function emoji_decode(
1038
        string $str,
1039
        bool $use_reversible_string_mappings = false
1040
    ): string {
1041 9
        self::initEmojiData();
1042
1043 9
        if ($use_reversible_string_mappings === true) {
1044 9
            return (string) \str_replace(
1045 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1046 9
                (array) self::$EMOJI_VALUES_CACHE,
1047 9
                $str
1048
            );
1049
        }
1050
1051 1
        return (string) \str_replace(
1052 1
            (array) self::$EMOJI_KEYS_CACHE,
1053 1
            (array) self::$EMOJI_VALUES_CACHE,
1054 1
            $str
1055
        );
1056
    }
1057
1058
    /**
1059
     * Encode a string with emoji chars into a non-emoji string.
1060
     *
1061
     * @param string $str                            <p>The input string</p>
1062
     * @param bool   $use_reversible_string_mappings [optional] <p>
1063
     *                                               when <b>TRUE</b>, we se a reversible string mapping
1064
     *                                               between "emoji_encode" and "emoji_decode"</p>
1065
     *
1066
     * @return string
1067
     */
1068 12
    public static function emoji_encode(
1069
        string $str,
1070
        bool $use_reversible_string_mappings = false
1071
    ): string {
1072 12
        self::initEmojiData();
1073
1074 12
        if ($use_reversible_string_mappings === true) {
1075 9
            return (string) \str_replace(
1076 9
                (array) self::$EMOJI_VALUES_CACHE,
1077 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1078 9
                $str
1079
            );
1080
        }
1081
1082 4
        return (string) \str_replace(
1083 4
            (array) self::$EMOJI_VALUES_CACHE,
1084 4
            (array) self::$EMOJI_KEYS_CACHE,
1085 4
            $str
1086
        );
1087
    }
1088
1089
    /**
1090
     * Encode a string with a new charset-encoding.
1091
     *
1092
     * INFO:  This function will also try to fix broken / double encoding,
1093
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1094
     *
1095
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1096
     * @param string $str                           <p>The input string</p>
1097
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1098
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1099
     *                                              string-encoding</p>
1100
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1101
     *                                              A empty string will trigger the autodetect anyway.</p>
1102
     *
1103
     * @return string
1104
     *
1105
     * @psalm-suppress InvalidReturnStatement
1106
     */
1107 28
    public static function encode(
1108
        string $to_encoding,
1109
        string $str,
1110
        bool $auto_detect_the_from_encoding = true,
1111
        string $from_encoding = ''
1112
    ): string {
1113 28
        if ($str === '' || $to_encoding === '') {
1114 13
            return $str;
1115
        }
1116
1117 28
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1118 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1119
        }
1120
1121 28
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1122 2
            $from_encoding = self::normalize_encoding($from_encoding, null);
1123
        }
1124
1125
        if (
1126 28
            $to_encoding
1127
            &&
1128 28
            $from_encoding
1129
            &&
1130 28
            $from_encoding === $to_encoding
1131
        ) {
1132
            return $str;
1133
        }
1134
1135 28
        if ($to_encoding === 'JSON') {
1136 1
            $return = self::json_encode($str);
1137 1
            if ($return === false) {
1138
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1139
            }
1140
1141 1
            return $return;
1142
        }
1143 28
        if ($from_encoding === 'JSON') {
1144 1
            $str = self::json_decode($str);
1145 1
            $from_encoding = '';
1146
        }
1147
1148 28
        if ($to_encoding === 'BASE64') {
1149 2
            return \base64_encode($str);
1150
        }
1151 28
        if ($from_encoding === 'BASE64') {
1152 2
            $str = \base64_decode($str, true);
1153 2
            $from_encoding = '';
1154
        }
1155
1156 28
        if ($to_encoding === 'HTML-ENTITIES') {
1157 2
            return self::html_encode($str, true, 'UTF-8');
1158
        }
1159 28
        if ($from_encoding === 'HTML-ENTITIES') {
1160 2
            $str = self::html_entity_decode($str, \ENT_COMPAT, 'UTF-8');
1161 2
            $from_encoding = '';
1162
        }
1163
1164 28
        $from_encoding_auto_detected = false;
1165
        if (
1166 28
            $auto_detect_the_from_encoding === true
1167
            ||
1168 28
            !$from_encoding
1169
        ) {
1170 28
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1171
        }
1172
1173
        // DEBUG
1174
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1175
1176 28
        if ($from_encoding_auto_detected !== false) {
1177
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1178 24
            $from_encoding = $from_encoding_auto_detected;
1179 7
        } elseif ($auto_detect_the_from_encoding === true) {
1180
            // fallback for the "autodetect"-mode
1181 7
            return self::to_utf8($str);
1182
        }
1183
1184
        if (
1185 24
            !$from_encoding
1186
            ||
1187 24
            $from_encoding === $to_encoding
1188
        ) {
1189 15
            return $str;
1190
        }
1191
1192
        if (
1193 19
            $to_encoding === 'UTF-8'
1194
            &&
1195
            (
1196 17
                $from_encoding === 'WINDOWS-1252'
1197
                ||
1198 19
                $from_encoding === 'ISO-8859-1'
1199
            )
1200
        ) {
1201 13
            return self::to_utf8($str);
1202
        }
1203
1204
        if (
1205 12
            $to_encoding === 'ISO-8859-1'
1206
            &&
1207
            (
1208 6
                $from_encoding === 'WINDOWS-1252'
1209
                ||
1210 12
                $from_encoding === 'UTF-8'
1211
            )
1212
        ) {
1213 6
            return self::to_iso8859($str);
1214
        }
1215
1216
        if (
1217 10
            $to_encoding !== 'UTF-8'
1218
            &&
1219 10
            $to_encoding !== 'ISO-8859-1'
1220
            &&
1221 10
            $to_encoding !== 'WINDOWS-1252'
1222
            &&
1223 10
            self::$SUPPORT['mbstring'] === false
1224
        ) {
1225
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1226
        }
1227
1228 10
        if (self::$SUPPORT['mbstring'] === true) {
1229
            // warning: do not use the symfony polyfill here
1230 10
            $str_encoded = \mb_convert_encoding(
1231 10
                $str,
1232 10
                $to_encoding,
1233 10
                $from_encoding
1234
            );
1235
1236 10
            if ($str_encoded) {
1237 10
                return $str_encoded;
1238
            }
1239
        }
1240
1241
        $return = \iconv($from_encoding, $to_encoding, $str);
1242
        if ($return !== false) {
1243
            return $return;
1244
        }
1245
1246
        return $str;
1247
    }
1248
1249
    /**
1250
     * @param string $str
1251
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1252
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1253
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1254
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1255
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1256
     *
1257
     * @return false|string
1258
     *                      <p>An encoded MIME field on success,
1259
     *                      or false if an error occurs during the encoding.</p>
1260
     */
1261
    public static function encode_mimeheader(
1262
        $str,
1263
        $from_charset = 'UTF-8',
1264
        $to_charset = 'UTF-8',
1265
        $transfer_encoding = 'Q',
1266
        $linefeed = '\\r\\n',
1267
        $indent = 76
1268
    ) {
1269
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1270
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1271
        }
1272
1273
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1274
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1275
        }
1276
1277
        return \iconv_mime_encode(
1278
            '',
1279
            $str,
1280
            [
1281
                'scheme'           => $transfer_encoding,
1282
                'line-length'      => $indent,
1283
                'input-charset'    => $from_charset,
1284
                'output-charset'   => $to_charset,
1285
                'line-break-chars' => $linefeed,
1286
            ]
1287
        );
1288
    }
1289
1290
    /**
1291
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1292
     *
1293
     * @param string   $str                       <p>The input string.</p>
1294
     * @param string   $search                    <p>The searched string.</p>
1295
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1296
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1297
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1298
     *
1299
     * @return string
1300
     */
1301 1
    public static function extract_text(
1302
        string $str,
1303
        string $search = '',
1304
        int $length = null,
1305
        string $replacer_for_skipped_text = '…',
1306
        string $encoding = 'UTF-8'
1307
    ): string {
1308 1
        if ($str === '') {
1309 1
            return '';
1310
        }
1311
1312 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1313
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1314
        }
1315
1316 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1317
1318 1
        if ($length === null) {
1319 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1320
        }
1321
1322 1
        if ($search === '') {
1323 1
            if ($encoding === 'UTF-8') {
1324 1
                if ($length > 0) {
1325 1
                    $string_length = (int) \mb_strlen($str);
1326 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1327
                } else {
1328 1
                    $end = 0;
1329
                }
1330
1331 1
                $pos = (int) \min(
1332 1
                    \mb_strpos($str, ' ', $end),
1333 1
                    \mb_strpos($str, '.', $end)
1334
                );
1335
            } else {
1336
                if ($length > 0) {
1337
                    $string_length = (int) self::strlen($str, $encoding);
1338
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1339
                } else {
1340
                    $end = 0;
1341
                }
1342
1343
                $pos = (int) \min(
1344
                    self::strpos($str, ' ', $end, $encoding),
1345
                    self::strpos($str, '.', $end, $encoding)
1346
                );
1347
            }
1348
1349 1
            if ($pos) {
1350 1
                if ($encoding === 'UTF-8') {
1351 1
                    $str_sub = \mb_substr($str, 0, $pos);
1352
                } else {
1353
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1354
                }
1355
1356 1
                if ($str_sub === false) {
1357
                    return '';
1358
                }
1359
1360 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1361
            }
1362
1363
            return $str;
1364
        }
1365
1366 1
        if ($encoding === 'UTF-8') {
1367 1
            $word_position = (int) \mb_stripos($str, $search);
1368 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1369
        } else {
1370
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1371
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1372
        }
1373
1374 1
        $pos_start = 0;
1375 1
        if ($half_side > 0) {
1376 1
            if ($encoding === 'UTF-8') {
1377 1
                $half_text = \mb_substr($str, 0, $half_side);
1378
            } else {
1379
                $half_text = self::substr($str, 0, $half_side, $encoding);
1380
            }
1381 1
            if ($half_text !== false) {
1382 1
                if ($encoding === 'UTF-8') {
1383 1
                    $pos_start = (int) \max(
1384 1
                        \mb_strrpos($half_text, ' '),
1385 1
                        \mb_strrpos($half_text, '.')
1386
                    );
1387
                } else {
1388
                    $pos_start = (int) \max(
1389
                        self::strrpos($half_text, ' ', 0, $encoding),
1390
                        self::strrpos($half_text, '.', 0, $encoding)
1391
                    );
1392
                }
1393
            }
1394
        }
1395
1396 1
        if ($word_position && $half_side > 0) {
1397 1
            $offset = $pos_start + $length - 1;
1398 1
            $real_length = (int) self::strlen($str, $encoding);
1399
1400 1
            if ($offset > $real_length) {
1401
                $offset = $real_length;
1402
            }
1403
1404 1
            if ($encoding === 'UTF-8') {
1405 1
                $pos_end = (int) \min(
1406 1
                    \mb_strpos($str, ' ', $offset),
1407 1
                    \mb_strpos($str, '.', $offset)
1408 1
                ) - $pos_start;
1409
            } else {
1410
                $pos_end = (int) \min(
1411
                    self::strpos($str, ' ', $offset, $encoding),
1412
                    self::strpos($str, '.', $offset, $encoding)
1413
                ) - $pos_start;
1414
            }
1415
1416 1
            if (!$pos_end || $pos_end <= 0) {
1417 1
                if ($encoding === 'UTF-8') {
1418 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1419
                } else {
1420
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1421
                }
1422 1
                if ($str_sub !== false) {
1423 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1424
                } else {
1425 1
                    $extract = '';
1426
                }
1427
            } else {
1428 1
                if ($encoding === 'UTF-8') {
1429 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1430
                } else {
1431
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1432
                }
1433 1
                if ($str_sub !== false) {
1434 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1435
                } else {
1436 1
                    $extract = '';
1437
                }
1438
            }
1439
        } else {
1440 1
            $offset = $length - 1;
1441 1
            $true_length = (int) self::strlen($str, $encoding);
1442
1443 1
            if ($offset > $true_length) {
1444
                $offset = $true_length;
1445
            }
1446
1447 1
            if ($encoding === 'UTF-8') {
1448 1
                $pos_end = (int) \min(
1449 1
                    \mb_strpos($str, ' ', $offset),
1450 1
                    \mb_strpos($str, '.', $offset)
1451
                );
1452
            } else {
1453
                $pos_end = (int) \min(
1454
                    self::strpos($str, ' ', $offset, $encoding),
1455
                    self::strpos($str, '.', $offset, $encoding)
1456
                );
1457
            }
1458
1459 1
            if ($pos_end) {
1460 1
                if ($encoding === 'UTF-8') {
1461 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1462
                } else {
1463
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1464
                }
1465 1
                if ($str_sub !== false) {
1466 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1467
                } else {
1468 1
                    $extract = '';
1469
                }
1470
            } else {
1471 1
                $extract = $str;
1472
            }
1473
        }
1474
1475 1
        return $extract;
1476
    }
1477
1478
    /**
1479
     * Reads entire file into a string.
1480
     *
1481
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1482
     *
1483
     * @see http://php.net/manual/en/function.file-get-contents.php
1484
     *
1485
     * @param string        $filename         <p>
1486
     *                                        Name of the file to read.
1487
     *                                        </p>
1488
     * @param bool          $use_include_path [optional] <p>
1489
     *                                        Prior to PHP 5, this parameter is called
1490
     *                                        use_include_path and is a bool.
1491
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1492
     *                                        to trigger include path
1493
     *                                        search.
1494
     *                                        </p>
1495
     * @param resource|null $context          [optional] <p>
1496
     *                                        A valid context resource created with
1497
     *                                        stream_context_create. If you don't need to use a
1498
     *                                        custom context, you can skip this parameter by &null;.
1499
     *                                        </p>
1500
     * @param int|null      $offset           [optional] <p>
1501
     *                                        The offset where the reading starts.
1502
     *                                        </p>
1503
     * @param int|null      $max_length       [optional] <p>
1504
     *                                        Maximum length of data read. The default is to read until end
1505
     *                                        of file is reached.
1506
     *                                        </p>
1507
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1508
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1509
     *                                        some files, because they used non default utf-8 chars. Binary files
1510
     *                                        like images or pdf will not be converted.</p>
1511
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1512
     *                                        A empty string will trigger the autodetect anyway.</p>
1513
     *
1514
     * @return false|string
1515
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1516
     */
1517 12
    public static function file_get_contents(
1518
        string $filename,
1519
        bool $use_include_path = false,
1520
        $context = null,
1521
        int $offset = null,
1522
        int $max_length = null,
1523
        int $timeout = 10,
1524
        bool $convert_to_utf8 = true,
1525
        string $from_encoding = ''
1526
    ) {
1527
        // init
1528 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1529
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1530 12
        if ($filename === false) {
1531
            return false;
1532
        }
1533
1534 12
        if ($timeout && $context === null) {
1535 9
            $context = \stream_context_create(
1536
                [
1537
                    'http' => [
1538 9
                        'timeout' => $timeout,
1539
                    ],
1540
                ]
1541
            );
1542
        }
1543
1544 12
        if ($offset === null) {
1545 12
            $offset = 0;
1546
        }
1547
1548 12
        if (\is_int($max_length) === true) {
1549 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1550
        } else {
1551 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1552
        }
1553
1554
        // return false on error
1555 12
        if ($data === false) {
1556
            return false;
1557
        }
1558
1559 12
        if ($convert_to_utf8 === true) {
1560
            if (
1561 12
                self::is_binary($data, true) !== true
1562
                ||
1563 9
                self::is_utf16($data, false) !== false
1564
                ||
1565 12
                self::is_utf32($data, false) !== false
1566
            ) {
1567 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1568 9
                $data = self::cleanup($data);
1569
            }
1570
        }
1571
1572 12
        return $data;
1573
    }
1574
1575
    /**
1576
     * Checks if a file starts with BOM (Byte Order Mark) character.
1577
     *
1578
     * @param string $file_path <p>Path to a valid file.</p>
1579
     *
1580
     * @throws \RuntimeException if file_get_contents() returned false
1581
     *
1582
     * @return bool
1583
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1584
     */
1585 2
    public static function file_has_bom(string $file_path): bool
1586
    {
1587 2
        $file_content = \file_get_contents($file_path);
1588 2
        if ($file_content === false) {
1589
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1590
        }
1591
1592 2
        return self::string_has_bom($file_content);
1593
    }
1594
1595
    /**
1596
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1597
     *
1598
     * @param mixed  $var
1599
     * @param int    $normalization_form
1600
     * @param string $leading_combining
1601
     *
1602
     * @return mixed
1603
     */
1604 62
    public static function filter(
1605
        $var,
1606
        int $normalization_form = \Normalizer::NFC,
1607
        string $leading_combining = '◌'
1608
    ) {
1609 62
        switch (\gettype($var)) {
1610 62
            case 'array':
1611
                /** @noinspection ForeachSourceInspection */
1612 6
                foreach ($var as $k => &$v) {
1613 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1614
                }
1615 6
                unset($v);
1616
1617 6
                break;
1618 62
            case 'object':
1619
                /** @noinspection ForeachSourceInspection */
1620 4
                foreach ($var as $k => &$v) {
1621 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1622
                }
1623 4
                unset($v);
1624
1625 4
                break;
1626 62
            case 'string':
1627
1628 62
                if (\strpos($var, "\r") !== false) {
1629
                    // Workaround https://bugs.php.net/65732
1630 3
                    $var = self::normalize_line_ending($var);
1631
                }
1632
1633 62
                if (ASCII::is_ascii($var) === false) {
1634 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1635 27
                        $n = '-';
1636
                    } else {
1637 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1638
1639 12
                        if (isset($n[0])) {
1640 7
                            $var = $n;
1641
                        } else {
1642 8
                            $var = self::encode('UTF-8', $var, true);
1643
                        }
1644
                    }
1645
1646
                    if (
1647 32
                        $var[0] >= "\x80"
1648
                        &&
1649 32
                        isset($n[0], $leading_combining[0])
1650
                        &&
1651 32
                        \preg_match('/^\\p{Mn}/u', $var)
1652
                    ) {
1653
                        // Prevent leading combining chars
1654
                        // for NFC-safe concatenations.
1655 3
                        $var = $leading_combining . $var;
1656
                    }
1657
                }
1658
1659 62
                break;
1660
        }
1661
1662 62
        return $var;
1663
    }
1664
1665
    /**
1666
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1667
     *
1668
     * Gets a specific external variable by name and optionally filters it
1669
     *
1670
     * @see http://php.net/manual/en/function.filter-input.php
1671
     *
1672
     * @param int    $type          <p>
1673
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1674
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1675
     *                              <b>INPUT_ENV</b>.
1676
     *                              </p>
1677
     * @param string $variable_name <p>
1678
     *                              Name of a variable to get.
1679
     *                              </p>
1680
     * @param int    $filter        [optional] <p>
1681
     *                              The ID of the filter to apply. The
1682
     *                              manual page lists the available filters.
1683
     *                              </p>
1684
     * @param mixed  $options       [optional] <p>
1685
     *                              Associative array of options or bitwise disjunction of flags. If filter
1686
     *                              accepts options, flags can be provided in "flags" field of array.
1687
     *                              </p>
1688
     *
1689
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1690
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1691
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1692
     */
1693
    public static function filter_input(
1694
        int $type,
1695
        string $variable_name,
1696
        int $filter = \FILTER_DEFAULT,
1697
        $options = null
1698
    ) {
1699
        if ($options === null || \func_num_args() < 4) {
1700
            $var = \filter_input($type, $variable_name, $filter);
1701
        } else {
1702
            $var = \filter_input($type, $variable_name, $filter, $options);
1703
        }
1704
1705
        return self::filter($var);
1706
    }
1707
1708
    /**
1709
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
     *
1711
     * Gets external variables and optionally filters them
1712
     *
1713
     * @see http://php.net/manual/en/function.filter-input-array.php
1714
     *
1715
     * @param int   $type       <p>
1716
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1717
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1718
     *                          <b>INPUT_ENV</b>.
1719
     *                          </p>
1720
     * @param mixed $definition [optional] <p>
1721
     *                          An array defining the arguments. A valid key is a string
1722
     *                          containing a variable name and a valid value is either a filter type, or an array
1723
     *                          optionally specifying the filter, flags and options. If the value is an
1724
     *                          array, valid keys are filter which specifies the
1725
     *                          filter type,
1726
     *                          flags which specifies any flags that apply to the
1727
     *                          filter, and options which specifies any options that
1728
     *                          apply to the filter. See the example below for a better understanding.
1729
     *                          </p>
1730
     *                          <p>
1731
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1732
     *                          input array are filtered by this filter.
1733
     *                          </p>
1734
     * @param bool  $add_empty  [optional] <p>
1735
     *                          Add missing keys as <b>NULL</b> to the return value.
1736
     *                          </p>
1737
     *
1738
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1739
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1740
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1741
     *               is not set and <b>NULL</b> if the filter fails.
1742
     */
1743
    public static function filter_input_array(
1744
        int $type,
1745
        $definition = null,
1746
        bool $add_empty = true
1747
    ) {
1748
        if ($definition === null || \func_num_args() < 2) {
1749
            $a = \filter_input_array($type);
1750
        } else {
1751
            $a = \filter_input_array($type, $definition, $add_empty);
1752
        }
1753
1754
        return self::filter($a);
1755
    }
1756
1757
    /**
1758
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1759
     *
1760
     * Filters a variable with a specified filter
1761
     *
1762
     * @see http://php.net/manual/en/function.filter-var.php
1763
     *
1764
     * @param mixed $variable <p>
1765
     *                        Value to filter.
1766
     *                        </p>
1767
     * @param int   $filter   [optional] <p>
1768
     *                        The ID of the filter to apply. The
1769
     *                        manual page lists the available filters.
1770
     *                        </p>
1771
     * @param mixed $options  [optional] <p>
1772
     *                        Associative array of options or bitwise disjunction of flags. If filter
1773
     *                        accepts options, flags can be provided in "flags" field of array. For
1774
     *                        the "callback" filter, callable type should be passed. The
1775
     *                        callback must accept one argument, the value to be filtered, and return
1776
     *                        the value after filtering/sanitizing it.
1777
     *                        </p>
1778
     *                        <p>
1779
     *                        <code>
1780
     *                        // for filters that accept options, use this format
1781
     *                        $options = array(
1782
     *                        'options' => array(
1783
     *                        'default' => 3, // value to return if the filter fails
1784
     *                        // other options here
1785
     *                        'min_range' => 0
1786
     *                        ),
1787
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1788
     *                        );
1789
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1790
     *                        // for filter that only accept flags, you can pass them directly
1791
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1792
     *                        // for filter that only accept flags, you can also pass as an array
1793
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1794
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1795
     *                        // callback validate filter
1796
     *                        function foo($value)
1797
     *                        {
1798
     *                        // Expected format: Surname, GivenNames
1799
     *                        if (strpos($value, ", ") === false) return false;
1800
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1801
     *                        $empty = (empty($surname) || empty($givennames));
1802
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1803
     *                        if ($empty || $notstrings) {
1804
     *                        return false;
1805
     *                        } else {
1806
     *                        return $value;
1807
     *                        }
1808
     *                        }
1809
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1810
     *                        </code>
1811
     *                        </p>
1812
     *
1813
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1814
     */
1815 2
    public static function filter_var(
1816
        $variable,
1817
        int $filter = \FILTER_DEFAULT,
1818
        $options = null
1819
    ) {
1820 2
        if (\func_num_args() < 3) {
1821 2
            $variable = \filter_var($variable, $filter);
1822
        } else {
1823 2
            $variable = \filter_var($variable, $filter, $options);
1824
        }
1825
1826 2
        return self::filter($variable);
1827
    }
1828
1829
    /**
1830
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1831
     *
1832
     * Gets multiple variables and optionally filters them
1833
     *
1834
     * @see http://php.net/manual/en/function.filter-var-array.php
1835
     *
1836
     * @param array $data       <p>
1837
     *                          An array with string keys containing the data to filter.
1838
     *                          </p>
1839
     * @param mixed $definition [optional] <p>
1840
     *                          An array defining the arguments. A valid key is a string
1841
     *                          containing a variable name and a valid value is either a
1842
     *                          filter type, or an
1843
     *                          array optionally specifying the filter, flags and options.
1844
     *                          If the value is an array, valid keys are filter
1845
     *                          which specifies the filter type,
1846
     *                          flags which specifies any flags that apply to the
1847
     *                          filter, and options which specifies any options that
1848
     *                          apply to the filter. See the example below for a better understanding.
1849
     *                          </p>
1850
     *                          <p>
1851
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1852
     *                          input array are filtered by this filter.
1853
     *                          </p>
1854
     * @param bool  $add_empty  [optional] <p>
1855
     *                          Add missing keys as <b>NULL</b> to the return value.
1856
     *                          </p>
1857
     *
1858
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1859
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1860
     *               set
1861
     */
1862 2
    public static function filter_var_array(
1863
        array $data,
1864
        $definition = null,
1865
        bool $add_empty = true
1866
    ) {
1867 2
        if (\func_num_args() < 2) {
1868 2
            $a = \filter_var_array($data);
1869
        } else {
1870 2
            $a = \filter_var_array($data, $definition, $add_empty);
1871
        }
1872
1873 2
        return self::filter($a);
1874
    }
1875
1876
    /**
1877
     * Checks whether finfo is available on the server.
1878
     *
1879
     * @return bool
1880
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1881
     */
1882
    public static function finfo_loaded(): bool
1883
    {
1884
        return \class_exists('finfo');
1885
    }
1886
1887
    /**
1888
     * Returns the first $n characters of the string.
1889
     *
1890
     * @param string $str      <p>The input string.</p>
1891
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1892
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1893
     *
1894
     * @return string
1895
     */
1896 13
    public static function first_char(
1897
        string $str,
1898
        int $n = 1,
1899
        string $encoding = 'UTF-8'
1900
    ): string {
1901 13
        if ($str === '' || $n <= 0) {
1902 5
            return '';
1903
        }
1904
1905 8
        if ($encoding === 'UTF-8') {
1906 4
            return (string) \mb_substr($str, 0, $n);
1907
        }
1908
1909 4
        return (string) self::substr($str, 0, $n, $encoding);
1910
    }
1911
1912
    /**
1913
     * Check if the number of Unicode characters isn't greater than the specified integer.
1914
     *
1915
     * @param string $str      the original string to be checked
1916
     * @param int    $box_size the size in number of chars to be checked against string
1917
     *
1918
     * @return bool true if string is less than or equal to $box_size, false otherwise
1919
     */
1920 2
    public static function fits_inside(string $str, int $box_size): bool
1921
    {
1922 2
        return (int) self::strlen($str) <= $box_size;
1923
    }
1924
1925
    /**
1926
     * Try to fix simple broken UTF-8 strings.
1927
     *
1928
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1929
     *
1930
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1931
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1932
     * See: http://en.wikipedia.org/wiki/Windows-1252
1933
     *
1934
     * @param string $str <p>The input string</p>
1935
     *
1936
     * @return string
1937
     */
1938 46
    public static function fix_simple_utf8(string $str): string
1939
    {
1940 46
        if ($str === '') {
1941 4
            return '';
1942
        }
1943
1944 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1945 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1946
1947 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1948 1
            if (self::$BROKEN_UTF8_FIX === null) {
1949 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1950
            }
1951
1952 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1953 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1954
        }
1955
1956 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1957
    }
1958
1959
    /**
1960
     * Fix a double (or multiple) encoded UTF8 string.
1961
     *
1962
     * @param string|string[] $str you can use a string or an array of strings
1963
     *
1964
     * @return string|string[]
1965
     *                         Will return the fixed input-"array" or
1966
     *                         the fixed input-"string"
1967
     *
1968
     * @psalm-suppress InvalidReturnType
1969
     */
1970 2
    public static function fix_utf8($str)
1971
    {
1972 2
        if (\is_array($str) === true) {
1973 2
            foreach ($str as $k => &$v) {
1974 2
                $v = self::fix_utf8($v);
1975
            }
1976 2
            unset($v);
1977
1978
            /**
1979
             * @psalm-suppress InvalidReturnStatement
1980
             */
1981 2
            return $str;
1982
        }
1983
1984 2
        $str = (string) $str;
1985 2
        $last = '';
1986 2
        while ($last !== $str) {
1987 2
            $last = $str;
1988
            /**
1989
             * @psalm-suppress PossiblyInvalidArgument
1990
             */
1991 2
            $str = self::to_utf8(
1992 2
                self::utf8_decode($str, true)
1993
            );
1994
        }
1995
1996
        /**
1997
         * @psalm-suppress InvalidReturnStatement
1998
         */
1999 2
        return $str;
2000
    }
2001
2002
    /**
2003
     * Get character of a specific character.
2004
     *
2005
     * @param string $char
2006
     *
2007
     * @return string 'RTL' or 'LTR'
2008
     */
2009 2
    public static function getCharDirection(string $char): string
2010
    {
2011 2
        if (self::$SUPPORT['intlChar'] === true) {
2012
            /** @noinspection PhpComposerExtensionStubsInspection */
2013 2
            $tmp_return = \IntlChar::charDirection($char);
2014
2015
            // from "IntlChar"-Class
2016
            $char_direction = [
2017 2
                'RTL' => [1, 13, 14, 15, 21],
2018
                'LTR' => [0, 11, 12, 20],
2019
            ];
2020
2021 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2022
                return 'LTR';
2023
            }
2024
2025 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2026 2
                return 'RTL';
2027
            }
2028
        }
2029
2030 2
        $c = static::chr_to_decimal($char);
2031
2032 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2033 2
            return 'LTR';
2034
        }
2035
2036 2
        if ($c <= 0x85e) {
2037 2
            if ($c === 0x5be ||
2038 2
                $c === 0x5c0 ||
2039 2
                $c === 0x5c3 ||
2040 2
                $c === 0x5c6 ||
2041 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2042 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2043 2
                $c === 0x608 ||
2044 2
                $c === 0x60b ||
2045 2
                $c === 0x60d ||
2046 2
                $c === 0x61b ||
2047 2
                ($c >= 0x61e && $c <= 0x64a) ||
2048
                ($c >= 0x66d && $c <= 0x66f) ||
2049
                ($c >= 0x671 && $c <= 0x6d5) ||
2050
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2051
                ($c >= 0x6ee && $c <= 0x6ef) ||
2052
                ($c >= 0x6fa && $c <= 0x70d) ||
2053
                $c === 0x710 ||
2054
                ($c >= 0x712 && $c <= 0x72f) ||
2055
                ($c >= 0x74d && $c <= 0x7a5) ||
2056
                $c === 0x7b1 ||
2057
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2058
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2059
                $c === 0x7fa ||
2060
                ($c >= 0x800 && $c <= 0x815) ||
2061
                $c === 0x81a ||
2062
                $c === 0x824 ||
2063
                $c === 0x828 ||
2064
                ($c >= 0x830 && $c <= 0x83e) ||
2065
                ($c >= 0x840 && $c <= 0x858) ||
2066 2
                $c === 0x85e
2067
            ) {
2068 2
                return 'RTL';
2069
            }
2070 2
        } elseif ($c === 0x200f) {
2071
            return 'RTL';
2072 2
        } elseif ($c >= 0xfb1d) {
2073 2
            if ($c === 0xfb1d ||
2074 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2075 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2076 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2077 2
                $c === 0xfb3e ||
2078 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2079 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2080 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2081 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2082 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2083 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2084 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2085 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2086 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2087 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2088 2
                $c === 0x10808 ||
2089 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2090 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2091 2
                $c === 0x1083c ||
2092 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2093 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2094 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2095 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2096 2
                $c === 0x1093f ||
2097 2
                $c === 0x10a00 ||
2098 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2099 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2100 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2101 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2102 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2103 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2104 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2105 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2106 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2107 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2108
            ) {
2109 2
                return 'RTL';
2110
            }
2111
        }
2112
2113 2
        return 'LTR';
2114
    }
2115
2116
    /**
2117
     * Check for php-support.
2118
     *
2119
     * @param string|null $key
2120
     *
2121
     * @return mixed
2122
     *               Return the full support-"array", if $key === null<br>
2123
     *               return bool-value, if $key is used and available<br>
2124
     *               otherwise return <strong>null</strong>
2125
     */
2126 27
    public static function getSupportInfo(string $key = null)
2127
    {
2128 27
        if ($key === null) {
2129 4
            return self::$SUPPORT;
2130
        }
2131
2132 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2133 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2134
        }
2135
        // compatibility fix for old versions
2136 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2137
2138 25
        return self::$SUPPORT[$key] ?? null;
2139
    }
2140
2141
    /**
2142
     * Warning: this method only works for some file-types (png, jpg)
2143
     *          if you need more supported types, please use e.g. "finfo"
2144
     *
2145
     * @param string $str
2146
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2147
     *
2148
     * @return array<string, string|null>
2149
     *                       <p>with this keys: 'ext', 'mime', 'type'</p>
2150
     */
2151 39
    public static function get_file_type(
2152
        string $str,
2153
        array $fallback = [
2154
            'ext'  => null,
2155
            'mime' => 'application/octet-stream',
2156
            'type' => null,
2157
        ]
2158
    ): array {
2159 39
        if ($str === '') {
2160
            return $fallback;
2161
        }
2162
2163
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2164 39
        $str_info = \substr($str, 0, 2);
2165 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2166 11
            return $fallback;
2167
        }
2168
2169
        // DEBUG
2170
        //var_dump($str_info);
2171
2172
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2173 35
        $str_info = \unpack('C2chars', $str_info);
0 ignored issues
show
Bug introduced by
$str_info of type array|false is incompatible with the type string expected by parameter $data of unpack(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

2173
        $str_info = \unpack('C2chars', /** @scrutinizer ignore-type */ $str_info);
Loading history...
2174 35
        if ($str_info === false) {
2175
            return $fallback;
2176
        }
2177
        /** @noinspection OffsetOperationsInspection */
2178 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2179
2180
        // DEBUG
2181
        //var_dump($type_code);
2182
2183
        //
2184
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2185
        //
2186
        switch ($type_code) {
2187
            // WARNING: do not add too simple comparisons, because of false-positive results:
2188
            //
2189
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2190
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2191
            //
2192 35
            case 255216:
2193
                $ext = 'jpg';
2194
                $mime = 'image/jpeg';
2195
                $type = 'binary';
2196
2197
                break;
2198 35
            case 13780:
2199 7
                $ext = 'png';
2200 7
                $mime = 'image/png';
2201 7
                $type = 'binary';
2202
2203 7
                break;
2204
            default:
2205 34
                return $fallback;
2206
        }
2207
2208
        return [
2209 7
            'ext'  => $ext,
2210 7
            'mime' => $mime,
2211 7
            'type' => $type,
2212
        ];
2213
    }
2214
2215
    /**
2216
     * @param int    $length         <p>Length of the random string.</p>
2217
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2218
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2219
     *
2220
     * @return string
2221
     */
2222 1
    public static function get_random_string(
2223
        int $length,
2224
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2225
        string $encoding = 'UTF-8'
2226
    ): string {
2227
        // init
2228 1
        $i = 0;
2229 1
        $str = '';
2230
2231
        //
2232
        // add random chars
2233
        //
2234
2235 1
        if ($encoding === 'UTF-8') {
2236 1
            $max_length = (int) \mb_strlen($possible_chars);
2237 1
            if ($max_length === 0) {
2238 1
                return '';
2239
            }
2240
2241 1
            while ($i < $length) {
2242
                try {
2243 1
                    $rand_int = \random_int(0, $max_length - 1);
2244
                } catch (\Exception $e) {
2245
                    /** @noinspection RandomApiMigrationInspection */
2246
                    $rand_int = \mt_rand(0, $max_length - 1);
2247
                }
2248 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2249 1
                if ($char !== false) {
2250 1
                    $str .= $char;
2251 1
                    ++$i;
2252
                }
2253
            }
2254
        } else {
2255
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2256
2257
            $max_length = (int) self::strlen($possible_chars, $encoding);
2258
            if ($max_length === 0) {
2259
                return '';
2260
            }
2261
2262
            while ($i < $length) {
2263
                try {
2264
                    $rand_int = \random_int(0, $max_length - 1);
2265
                } catch (\Exception $e) {
2266
                    /** @noinspection RandomApiMigrationInspection */
2267
                    $rand_int = \mt_rand(0, $max_length - 1);
2268
                }
2269
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2270
                if ($char !== false) {
2271
                    $str .= $char;
2272
                    ++$i;
2273
                }
2274
            }
2275
        }
2276
2277 1
        return $str;
2278
    }
2279
2280
    /**
2281
     * @param int|string $entropy_extra [optional] <p>Extra entropy via a string or int value.</p>
2282
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2283
     *
2284
     * @return string
2285
     */
2286 1
    public static function get_unique_string($entropy_extra = '', bool $use_md5 = true): string
2287
    {
2288 1
        $unique_helper = \random_int(0, \mt_getrandmax()) .
2289 1
                        \session_id() .
2290 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2291 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2292 1
                        $entropy_extra;
2293
2294 1
        $unique_string = \uniqid($unique_helper, true);
2295
2296 1
        if ($use_md5) {
2297 1
            $unique_string = \md5($unique_string . $unique_helper);
2298
        }
2299
2300 1
        return $unique_string;
2301
    }
2302
2303
    /**
2304
     * alias for "UTF8::string_has_bom()"
2305
     *
2306
     * @param string $str
2307
     *
2308
     * @return bool
2309
     *
2310
     * @see UTF8::string_has_bom()
2311
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2312
     */
2313 2
    public static function hasBom(string $str): bool
2314
    {
2315 2
        return self::string_has_bom($str);
2316
    }
2317
2318
    /**
2319
     * Returns true if the string contains a lower case char, false otherwise.
2320
     *
2321
     * @param string $str <p>The input string.</p>
2322
     *
2323
     * @return bool
2324
     *              <p>Whether or not the string contains a lower case character.</p>
2325
     */
2326 47
    public static function has_lowercase(string $str): bool
2327
    {
2328 47
        if (self::$SUPPORT['mbstring'] === true) {
2329
            /** @noinspection PhpComposerExtensionStubsInspection */
2330 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2331
        }
2332
2333
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2334
    }
2335
2336
    /**
2337
     * Returns true if the string contains an upper case char, false otherwise.
2338
     *
2339
     * @param string $str <p>The input string.</p>
2340
     *
2341
     * @return bool whether or not the string contains an upper case character
2342
     */
2343 12
    public static function has_uppercase(string $str): bool
2344
    {
2345 12
        if (self::$SUPPORT['mbstring'] === true) {
2346
            /** @noinspection PhpComposerExtensionStubsInspection */
2347 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2348
        }
2349
2350
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2351
    }
2352
2353
    /**
2354
     * Converts a hexadecimal value into a UTF-8 character.
2355
     *
2356
     * @param string $hexdec <p>The hexadecimal value.</p>
2357
     *
2358
     * @return false|string one single UTF-8 character
2359
     */
2360 4
    public static function hex_to_chr(string $hexdec)
2361
    {
2362 4
        return self::decimal_to_chr(\hexdec($hexdec));
2363
    }
2364
2365
    /**
2366
     * Converts hexadecimal U+xxxx code point representation to integer.
2367
     *
2368
     * INFO: opposite to UTF8::int_to_hex()
2369
     *
2370
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2371
     *
2372
     * @return false|int the code point, or false on failure
2373
     */
2374 2
    public static function hex_to_int($hexdec)
2375
    {
2376
        // init
2377 2
        $hexdec = (string) $hexdec;
2378
2379 2
        if ($hexdec === '') {
2380 2
            return false;
2381
        }
2382
2383 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2384 2
            return \intval($match[1], 16);
2385
        }
2386
2387 2
        return false;
2388
    }
2389
2390
    /**
2391
     * alias for "UTF8::html_entity_decode()"
2392
     *
2393
     * @param string $str
2394
     * @param int    $flags
2395
     * @param string $encoding
2396
     *
2397
     * @return string
2398
     *
2399
     * @see UTF8::html_entity_decode()
2400
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2401
     */
2402 2
    public static function html_decode(
2403
        string $str,
2404
        int $flags = null,
2405
        string $encoding = 'UTF-8'
2406
    ): string {
2407 2
        return self::html_entity_decode($str, $flags, $encoding);
2408
    }
2409
2410
    /**
2411
     * Converts a UTF-8 string to a series of HTML numbered entities.
2412
     *
2413
     * INFO: opposite to UTF8::html_decode()
2414
     *
2415
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2416
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2417
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2418
     *
2419
     * @return string HTML numbered entities
2420
     */
2421 14
    public static function html_encode(
2422
        string $str,
2423
        bool $keep_ascii_chars = false,
2424
        string $encoding = 'UTF-8'
2425
    ): string {
2426 14
        if ($str === '') {
2427 4
            return '';
2428
        }
2429
2430 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2431 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2432
        }
2433
2434
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2435 14
        if (self::$SUPPORT['mbstring'] === true) {
2436 14
            $start_code = 0x00;
2437 14
            if ($keep_ascii_chars === true) {
2438 13
                $start_code = 0x80;
2439
            }
2440
2441 14
            if ($encoding === 'UTF-8') {
2442
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2443 14
                $return = \mb_encode_numericentity(
2444 14
                    $str,
2445 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2446
                );
2447 14
                if ($return !== null && $return !== false) {
2448 14
                    return $return;
2449
                }
2450
            }
2451
2452
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2453 4
            $return = \mb_encode_numericentity(
2454 4
                $str,
2455 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2456 4
                $encoding
2457
            );
2458 4
            if ($return !== null && $return !== false) {
2459 4
                return $return;
2460
            }
2461
        }
2462
2463
        //
2464
        // fallback via vanilla php
2465
        //
2466
2467
        return \implode(
2468
            '',
2469
            \array_map(
2470
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2471
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2472
                },
2473
                self::str_split($str)
2474
            )
2475
        );
2476
    }
2477
2478
    /**
2479
     * UTF-8 version of html_entity_decode()
2480
     *
2481
     * The reason we are not using html_entity_decode() by itself is because
2482
     * while it is not technically correct to leave out the semicolon
2483
     * at the end of an entity most browsers will still interpret the entity
2484
     * correctly. html_entity_decode() does not convert entities without
2485
     * semicolons, so we are left with our own little solution here. Bummer.
2486
     *
2487
     * Convert all HTML entities to their applicable characters
2488
     *
2489
     * INFO: opposite to UTF8::html_encode()
2490
     *
2491
     * @see http://php.net/manual/en/function.html-entity-decode.php
2492
     *
2493
     * @param string $str      <p>
2494
     *                         The input string.
2495
     *                         </p>
2496
     * @param int    $flags    [optional] <p>
2497
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2498
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2499
     *                         <table>
2500
     *                         Available <i>flags</i> constants
2501
     *                         <tr valign="top">
2502
     *                         <td>Constant Name</td>
2503
     *                         <td>Description</td>
2504
     *                         </tr>
2505
     *                         <tr valign="top">
2506
     *                         <td><b>ENT_COMPAT</b></td>
2507
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2508
     *                         </tr>
2509
     *                         <tr valign="top">
2510
     *                         <td><b>ENT_QUOTES</b></td>
2511
     *                         <td>Will convert both double and single quotes.</td>
2512
     *                         </tr>
2513
     *                         <tr valign="top">
2514
     *                         <td><b>ENT_NOQUOTES</b></td>
2515
     *                         <td>Will leave both double and single quotes unconverted.</td>
2516
     *                         </tr>
2517
     *                         <tr valign="top">
2518
     *                         <td><b>ENT_HTML401</b></td>
2519
     *                         <td>
2520
     *                         Handle code as HTML 4.01.
2521
     *                         </td>
2522
     *                         </tr>
2523
     *                         <tr valign="top">
2524
     *                         <td><b>ENT_XML1</b></td>
2525
     *                         <td>
2526
     *                         Handle code as XML 1.
2527
     *                         </td>
2528
     *                         </tr>
2529
     *                         <tr valign="top">
2530
     *                         <td><b>ENT_XHTML</b></td>
2531
     *                         <td>
2532
     *                         Handle code as XHTML.
2533
     *                         </td>
2534
     *                         </tr>
2535
     *                         <tr valign="top">
2536
     *                         <td><b>ENT_HTML5</b></td>
2537
     *                         <td>
2538
     *                         Handle code as HTML 5.
2539
     *                         </td>
2540
     *                         </tr>
2541
     *                         </table>
2542
     *                         </p>
2543
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2544
     *
2545
     * @return string the decoded string
2546
     */
2547 50
    public static function html_entity_decode(
2548
        string $str,
2549
        int $flags = null,
2550
        string $encoding = 'UTF-8'
2551
    ): string {
2552
        if (
2553 50
            !isset($str[3]) // examples: &; || &x;
2554
            ||
2555 50
            \strpos($str, '&') === false // no "&"
2556
        ) {
2557 23
            return $str;
2558
        }
2559
2560 48
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2561 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2562
        }
2563
2564 48
        if ($flags === null) {
2565 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2566
        }
2567
2568
        if (
2569 48
            $encoding !== 'UTF-8'
2570
            &&
2571 48
            $encoding !== 'ISO-8859-1'
2572
            &&
2573 48
            $encoding !== 'WINDOWS-1252'
2574
            &&
2575 48
            self::$SUPPORT['mbstring'] === false
2576
        ) {
2577
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2578
        }
2579
2580
        do {
2581 48
            $str_compare = $str;
2582
2583 48
            if (\strpos($str, '&') !== false) {
2584 48
                if (\strpos($str, '&#') !== false) {
2585
                    // decode also numeric & UTF16 two byte entities
2586 40
                    $str = (string) \preg_replace(
2587 40
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2588 40
                        '$1;',
2589 40
                        $str
2590
                    );
2591
                }
2592
2593 48
                $str = \html_entity_decode(
2594 48
                    $str,
2595 48
                    $flags,
2596 48
                    $encoding
2597
                );
2598
            }
2599 48
        } while ($str_compare !== $str);
2600
2601 48
        return $str;
2602
    }
2603
2604
    /**
2605
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2606
     *
2607
     * @param string $str
2608
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2609
     *
2610
     * @return string
2611
     */
2612 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2613
    {
2614 6
        return self::htmlspecialchars(
2615 6
            $str,
2616 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2617 6
            $encoding
2618
        );
2619
    }
2620
2621
    /**
2622
     * Remove empty html-tag.
2623
     *
2624
     * e.g.: <tag></tag>
2625
     *
2626
     * @param string $str
2627
     *
2628
     * @return string
2629
     */
2630 1
    public static function html_stripe_empty_tags(string $str): string
2631
    {
2632 1
        return (string) \preg_replace(
2633 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
2634 1
            '',
2635 1
            $str
2636
        );
2637
    }
2638
2639
    /**
2640
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2641
     *
2642
     * @see http://php.net/manual/en/function.htmlentities.php
2643
     *
2644
     * @param string $str           <p>
2645
     *                              The input string.
2646
     *                              </p>
2647
     * @param int    $flags         [optional] <p>
2648
     *                              A bitmask of one or more of the following flags, which specify how to handle
2649
     *                              quotes, invalid code unit sequences and the used document type. The default is
2650
     *                              ENT_COMPAT | ENT_HTML401.
2651
     *                              <table>
2652
     *                              Available <i>flags</i> constants
2653
     *                              <tr valign="top">
2654
     *                              <td>Constant Name</td>
2655
     *                              <td>Description</td>
2656
     *                              </tr>
2657
     *                              <tr valign="top">
2658
     *                              <td><b>ENT_COMPAT</b></td>
2659
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2660
     *                              </tr>
2661
     *                              <tr valign="top">
2662
     *                              <td><b>ENT_QUOTES</b></td>
2663
     *                              <td>Will convert both double and single quotes.</td>
2664
     *                              </tr>
2665
     *                              <tr valign="top">
2666
     *                              <td><b>ENT_NOQUOTES</b></td>
2667
     *                              <td>Will leave both double and single quotes unconverted.</td>
2668
     *                              </tr>
2669
     *                              <tr valign="top">
2670
     *                              <td><b>ENT_IGNORE</b></td>
2671
     *                              <td>
2672
     *                              Silently discard invalid code unit sequences instead of returning
2673
     *                              an empty string. Using this flag is discouraged as it
2674
     *                              may have security implications.
2675
     *                              </td>
2676
     *                              </tr>
2677
     *                              <tr valign="top">
2678
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2679
     *                              <td>
2680
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2681
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2682
     *                              string.
2683
     *                              </td>
2684
     *                              </tr>
2685
     *                              <tr valign="top">
2686
     *                              <td><b>ENT_DISALLOWED</b></td>
2687
     *                              <td>
2688
     *                              Replace invalid code points for the given document type with a
2689
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2690
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2691
     *                              instance, to ensure the well-formedness of XML documents with
2692
     *                              embedded external content.
2693
     *                              </td>
2694
     *                              </tr>
2695
     *                              <tr valign="top">
2696
     *                              <td><b>ENT_HTML401</b></td>
2697
     *                              <td>
2698
     *                              Handle code as HTML 4.01.
2699
     *                              </td>
2700
     *                              </tr>
2701
     *                              <tr valign="top">
2702
     *                              <td><b>ENT_XML1</b></td>
2703
     *                              <td>
2704
     *                              Handle code as XML 1.
2705
     *                              </td>
2706
     *                              </tr>
2707
     *                              <tr valign="top">
2708
     *                              <td><b>ENT_XHTML</b></td>
2709
     *                              <td>
2710
     *                              Handle code as XHTML.
2711
     *                              </td>
2712
     *                              </tr>
2713
     *                              <tr valign="top">
2714
     *                              <td><b>ENT_HTML5</b></td>
2715
     *                              <td>
2716
     *                              Handle code as HTML 5.
2717
     *                              </td>
2718
     *                              </tr>
2719
     *                              </table>
2720
     *                              </p>
2721
     * @param string $encoding      [optional] <p>
2722
     *                              Like <b>htmlspecialchars</b>,
2723
     *                              <b>htmlentities</b> takes an optional third argument
2724
     *                              <i>encoding</i> which defines encoding used in
2725
     *                              conversion.
2726
     *                              Although this argument is technically optional, you are highly
2727
     *                              encouraged to specify the correct value for your code.
2728
     *                              </p>
2729
     * @param bool   $double_encode [optional] <p>
2730
     *                              When <i>double_encode</i> is turned off PHP will not
2731
     *                              encode existing html entities. The default is to convert everything.
2732
     *                              </p>
2733
     *
2734
     * @return string
2735
     *                <p>
2736
     *                The encoded string.
2737
     *                <br><br>
2738
     *                If the input <i>string</i> contains an invalid code unit
2739
     *                sequence within the given <i>encoding</i> an empty string
2740
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2741
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2742
     *                </p>
2743
     */
2744 9
    public static function htmlentities(
2745
        string $str,
2746
        int $flags = \ENT_COMPAT,
2747
        string $encoding = 'UTF-8',
2748
        bool $double_encode = true
2749
    ): string {
2750 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2751 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2752
        }
2753
2754 9
        $str = \htmlentities(
2755 9
            $str,
2756 9
            $flags,
2757 9
            $encoding,
2758 9
            $double_encode
2759
        );
2760
2761
        /**
2762
         * PHP doesn't replace a backslash to its html entity since this is something
2763
         * that's mostly used to escape characters when inserting in a database. Since
2764
         * we're using a decent database layer, we don't need this shit and we're replacing
2765
         * the double backslashes by its' html entity equivalent.
2766
         *
2767
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2768
         */
2769 9
        $str = \str_replace('\\', '&#92;', $str);
2770
2771 9
        return self::html_encode($str, true, $encoding);
2772
    }
2773
2774
    /**
2775
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2776
     *
2777
     * INFO: Take a look at "UTF8::htmlentities()"
2778
     *
2779
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2780
     *
2781
     * @param string $str           <p>
2782
     *                              The string being converted.
2783
     *                              </p>
2784
     * @param int    $flags         [optional] <p>
2785
     *                              A bitmask of one or more of the following flags, which specify how to handle
2786
     *                              quotes, invalid code unit sequences and the used document type. The default is
2787
     *                              ENT_COMPAT | ENT_HTML401.
2788
     *                              <table>
2789
     *                              Available <i>flags</i> constants
2790
     *                              <tr valign="top">
2791
     *                              <td>Constant Name</td>
2792
     *                              <td>Description</td>
2793
     *                              </tr>
2794
     *                              <tr valign="top">
2795
     *                              <td><b>ENT_COMPAT</b></td>
2796
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2797
     *                              </tr>
2798
     *                              <tr valign="top">
2799
     *                              <td><b>ENT_QUOTES</b></td>
2800
     *                              <td>Will convert both double and single quotes.</td>
2801
     *                              </tr>
2802
     *                              <tr valign="top">
2803
     *                              <td><b>ENT_NOQUOTES</b></td>
2804
     *                              <td>Will leave both double and single quotes unconverted.</td>
2805
     *                              </tr>
2806
     *                              <tr valign="top">
2807
     *                              <td><b>ENT_IGNORE</b></td>
2808
     *                              <td>
2809
     *                              Silently discard invalid code unit sequences instead of returning
2810
     *                              an empty string. Using this flag is discouraged as it
2811
     *                              may have security implications.
2812
     *                              </td>
2813
     *                              </tr>
2814
     *                              <tr valign="top">
2815
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2816
     *                              <td>
2817
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2818
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2819
     *                              string.
2820
     *                              </td>
2821
     *                              </tr>
2822
     *                              <tr valign="top">
2823
     *                              <td><b>ENT_DISALLOWED</b></td>
2824
     *                              <td>
2825
     *                              Replace invalid code points for the given document type with a
2826
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2827
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2828
     *                              instance, to ensure the well-formedness of XML documents with
2829
     *                              embedded external content.
2830
     *                              </td>
2831
     *                              </tr>
2832
     *                              <tr valign="top">
2833
     *                              <td><b>ENT_HTML401</b></td>
2834
     *                              <td>
2835
     *                              Handle code as HTML 4.01.
2836
     *                              </td>
2837
     *                              </tr>
2838
     *                              <tr valign="top">
2839
     *                              <td><b>ENT_XML1</b></td>
2840
     *                              <td>
2841
     *                              Handle code as XML 1.
2842
     *                              </td>
2843
     *                              </tr>
2844
     *                              <tr valign="top">
2845
     *                              <td><b>ENT_XHTML</b></td>
2846
     *                              <td>
2847
     *                              Handle code as XHTML.
2848
     *                              </td>
2849
     *                              </tr>
2850
     *                              <tr valign="top">
2851
     *                              <td><b>ENT_HTML5</b></td>
2852
     *                              <td>
2853
     *                              Handle code as HTML 5.
2854
     *                              </td>
2855
     *                              </tr>
2856
     *                              </table>
2857
     *                              </p>
2858
     * @param string $encoding      [optional] <p>
2859
     *                              Defines encoding used in conversion.
2860
     *                              </p>
2861
     *                              <p>
2862
     *                              For the purposes of this function, the encodings
2863
     *                              ISO-8859-1, ISO-8859-15,
2864
     *                              UTF-8, cp866,
2865
     *                              cp1251, cp1252, and
2866
     *                              KOI8-R are effectively equivalent, provided the
2867
     *                              <i>string</i> itself is valid for the encoding, as
2868
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2869
     *                              the same positions in all of these encodings.
2870
     *                              </p>
2871
     * @param bool   $double_encode [optional] <p>
2872
     *                              When <i>double_encode</i> is turned off PHP will not
2873
     *                              encode existing html entities, the default is to convert everything.
2874
     *                              </p>
2875
     *
2876
     * @return string the converted string.
2877
     *                </p>
2878
     *                <p>
2879
     *                If the input <i>string</i> contains an invalid code unit
2880
     *                sequence within the given <i>encoding</i> an empty string
2881
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2882
     *                <b>ENT_SUBSTITUTE</b> flags are set
2883
     */
2884 8
    public static function htmlspecialchars(
2885
        string $str,
2886
        int $flags = \ENT_COMPAT,
2887
        string $encoding = 'UTF-8',
2888
        bool $double_encode = true
2889
    ): string {
2890 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2891 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2892
        }
2893
2894 8
        return \htmlspecialchars(
2895 8
            $str,
2896 8
            $flags,
2897 8
            $encoding,
2898 8
            $double_encode
2899
        );
2900
    }
2901
2902
    /**
2903
     * Checks whether iconv is available on the server.
2904
     *
2905
     * @return bool
2906
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2907
     */
2908
    public static function iconv_loaded(): bool
2909
    {
2910
        return \extension_loaded('iconv');
2911
    }
2912
2913
    /**
2914
     * alias for "UTF8::decimal_to_chr()"
2915
     *
2916
     * @param mixed $int
2917
     *
2918
     * @return string
2919
     *
2920
     * @see UTF8::decimal_to_chr()
2921
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
2922
     */
2923 4
    public static function int_to_chr($int): string
2924
    {
2925 4
        return self::decimal_to_chr($int);
2926
    }
2927
2928
    /**
2929
     * Converts Integer to hexadecimal U+xxxx code point representation.
2930
     *
2931
     * INFO: opposite to UTF8::hex_to_int()
2932
     *
2933
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
2934
     * @param string $prefix [optional]
2935
     *
2936
     * @return string the code point, or empty string on failure
2937
     */
2938 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
2939
    {
2940 6
        $hex = \dechex($int);
2941
2942 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2943
2944 6
        return $prefix . $hex . '';
2945
    }
2946
2947
    /**
2948
     * Checks whether intl-char is available on the server.
2949
     *
2950
     * @return bool
2951
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2952
     */
2953
    public static function intlChar_loaded(): bool
2954
    {
2955
        return \class_exists('IntlChar');
2956
    }
2957
2958
    /**
2959
     * Checks whether intl is available on the server.
2960
     *
2961
     * @return bool
2962
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2963
     */
2964 5
    public static function intl_loaded(): bool
2965
    {
2966 5
        return \extension_loaded('intl');
2967
    }
2968
2969
    /**
2970
     * alias for "UTF8::is_ascii()"
2971
     *
2972
     * @param string $str
2973
     *
2974
     * @return bool
2975
     *
2976
     * @see UTF8::is_ascii()
2977
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
2978
     */
2979 2
    public static function isAscii(string $str): bool
2980
    {
2981 2
        return ASCII::is_ascii($str);
2982
    }
2983
2984
    /**
2985
     * alias for "UTF8::is_base64()"
2986
     *
2987
     * @param string $str
2988
     *
2989
     * @return bool
2990
     *
2991
     * @see UTF8::is_base64()
2992
     * @deprecated <p>please use "UTF8::is_base64()"</p>
2993
     */
2994 2
    public static function isBase64($str): bool
2995
    {
2996 2
        return self::is_base64($str);
2997
    }
2998
2999
    /**
3000
     * alias for "UTF8::is_binary()"
3001
     *
3002
     * @param mixed $str
3003
     * @param bool  $strict
3004
     *
3005
     * @return bool
3006
     *
3007
     * @see UTF8::is_binary()
3008
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3009
     */
3010 4
    public static function isBinary($str, $strict = false): bool
3011
    {
3012 4
        return self::is_binary($str, $strict);
3013
    }
3014
3015
    /**
3016
     * alias for "UTF8::is_bom()"
3017
     *
3018
     * @param string $utf8_chr
3019
     *
3020
     * @return bool
3021
     *
3022
     * @see UTF8::is_bom()
3023
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3024
     */
3025 2
    public static function isBom(string $utf8_chr): bool
3026
    {
3027 2
        return self::is_bom($utf8_chr);
3028
    }
3029
3030
    /**
3031
     * alias for "UTF8::is_html()"
3032
     *
3033
     * @param string $str
3034
     *
3035
     * @return bool
3036
     *
3037
     * @see UTF8::is_html()
3038
     * @deprecated <p>please use "UTF8::is_html()"</p>
3039
     */
3040 2
    public static function isHtml(string $str): bool
3041
    {
3042 2
        return self::is_html($str);
3043
    }
3044
3045
    /**
3046
     * alias for "UTF8::is_json()"
3047
     *
3048
     * @param string $str
3049
     *
3050
     * @return bool
3051
     *
3052
     * @see UTF8::is_json()
3053
     * @deprecated <p>please use "UTF8::is_json()"</p>
3054
     */
3055
    public static function isJson(string $str): bool
3056
    {
3057
        return self::is_json($str);
3058
    }
3059
3060
    /**
3061
     * alias for "UTF8::is_utf16()"
3062
     *
3063
     * @param mixed $str
3064
     *
3065
     * @return false|int
3066
     *                   <strong>false</strong> if is't not UTF16,<br>
3067
     *                   <strong>1</strong> for UTF-16LE,<br>
3068
     *                   <strong>2</strong> for UTF-16BE
3069
     *
3070
     * @see UTF8::is_utf16()
3071
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3072
     */
3073 2
    public static function isUtf16($str)
3074
    {
3075 2
        return self::is_utf16($str);
3076
    }
3077
3078
    /**
3079
     * alias for "UTF8::is_utf32()"
3080
     *
3081
     * @param mixed $str
3082
     *
3083
     * @return false|int
3084
     *                   <strong>false</strong> if is't not UTF16,
3085
     *                   <strong>1</strong> for UTF-32LE,
3086
     *                   <strong>2</strong> for UTF-32BE
3087
     *
3088
     * @see UTF8::is_utf32()
3089
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3090
     */
3091 2
    public static function isUtf32($str)
3092
    {
3093 2
        return self::is_utf32($str);
3094
    }
3095
3096
    /**
3097
     * alias for "UTF8::is_utf8()"
3098
     *
3099
     * @param string $str
3100
     * @param bool   $strict
3101
     *
3102
     * @return bool
3103
     *
3104
     * @see UTF8::is_utf8()
3105
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3106
     */
3107 17
    public static function isUtf8($str, $strict = false): bool
3108
    {
3109 17
        return self::is_utf8($str, $strict);
3110
    }
3111
3112
    /**
3113
     * Returns true if the string contains only alphabetic chars, false otherwise.
3114
     *
3115
     * @param string $str <p>The input string.</p>
3116
     *
3117
     * @return bool
3118
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3119
     */
3120 10
    public static function is_alpha(string $str): bool
3121
    {
3122 10
        if (self::$SUPPORT['mbstring'] === true) {
3123
            /** @noinspection PhpComposerExtensionStubsInspection */
3124 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3125
        }
3126
3127
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3128
    }
3129
3130
    /**
3131
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3132
     *
3133
     * @param string $str <p>The input string.</p>
3134
     *
3135
     * @return bool
3136
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3137
     */
3138 13
    public static function is_alphanumeric(string $str): bool
3139
    {
3140 13
        if (self::$SUPPORT['mbstring'] === true) {
3141
            /** @noinspection PhpComposerExtensionStubsInspection */
3142 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3143
        }
3144
3145
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3146
    }
3147
3148
    /**
3149
     * Checks if a string is 7 bit ASCII.
3150
     *
3151
     * @param string $str <p>The string to check.</p>
3152
     *
3153
     * @return bool
3154
     *              <p>
3155
     *              <strong>true</strong> if it is ASCII<br>
3156
     *              <strong>false</strong> otherwise
3157
     *              </p>
3158
     */
3159 8
    public static function is_ascii(string $str): bool
3160
    {
3161 8
        return ASCII::is_ascii($str);
3162
    }
3163
3164
    /**
3165
     * Returns true if the string is base64 encoded, false otherwise.
3166
     *
3167
     * @param mixed|string $str                   <p>The input string.</p>
3168
     * @param bool         $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3169
     *
3170
     * @return bool whether or not $str is base64 encoded
3171
     */
3172 16
    public static function is_base64($str, $empty_string_is_valid = false): bool
3173
    {
3174
        if (
3175 16
            $empty_string_is_valid === false
3176
            &&
3177 16
            $str === ''
3178
        ) {
3179 3
            return false;
3180
        }
3181
3182
        /**
3183
         * @psalm-suppress RedundantConditionGivenDocblockType
3184
         */
3185 15
        if (\is_string($str) === false) {
3186 2
            return false;
3187
        }
3188
3189 15
        $base64String = \base64_decode($str, true);
3190
3191 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3192
    }
3193
3194
    /**
3195
     * Check if the input is binary... (is look like a hack).
3196
     *
3197
     * @param mixed $input
3198
     * @param bool  $strict
3199
     *
3200
     * @return bool
3201
     */
3202 39
    public static function is_binary($input, bool $strict = false): bool
3203
    {
3204 39
        $input = (string) $input;
3205 39
        if ($input === '') {
3206 10
            return false;
3207
        }
3208
3209 39
        if (\preg_match('~^[01]+$~', $input)) {
3210 13
            return true;
3211
        }
3212
3213 39
        $ext = self::get_file_type($input);
3214 39
        if ($ext['type'] === 'binary') {
3215 7
            return true;
3216
        }
3217
3218 38
        $test_length = \strlen($input);
3219 38
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3220 38
        if (($test_null_counting / $test_length) > 0.25) {
3221 15
            return true;
3222
        }
3223
3224 34
        if ($strict === true) {
3225 34
            if (self::$SUPPORT['finfo'] === false) {
3226
                throw new \RuntimeException('ext-fileinfo: is not installed');
3227
            }
3228
3229
            /** @noinspection PhpComposerExtensionStubsInspection */
3230 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3231 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3232 15
                return true;
3233
            }
3234
        }
3235
3236 30
        return false;
3237
    }
3238
3239
    /**
3240
     * Check if the file is binary.
3241
     *
3242
     * @param string $file
3243
     *
3244
     * @return bool
3245
     */
3246 6
    public static function is_binary_file($file): bool
3247
    {
3248
        // init
3249 6
        $block = '';
3250
3251 6
        $fp = \fopen($file, 'rb');
3252 6
        if (\is_resource($fp)) {
3253 6
            $block = \fread($fp, 512);
3254 6
            \fclose($fp);
3255
        }
3256
3257 6
        if ($block === '') {
3258 2
            return false;
3259
        }
3260
3261 6
        return self::is_binary($block, true);
3262
    }
3263
3264
    /**
3265
     * Returns true if the string contains only whitespace chars, false otherwise.
3266
     *
3267
     * @param string $str <p>The input string.</p>
3268
     *
3269
     * @return bool
3270
     *              <p>Whether or not $str contains only whitespace characters.</p>
3271
     */
3272 15
    public static function is_blank(string $str): bool
3273
    {
3274 15
        if (self::$SUPPORT['mbstring'] === true) {
3275
            /** @noinspection PhpComposerExtensionStubsInspection */
3276 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3277
        }
3278
3279
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3280
    }
3281
3282
    /**
3283
     * Checks if the given string is equal to any "Byte Order Mark".
3284
     *
3285
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3286
     *
3287
     * @param string $str <p>The input string.</p>
3288
     *
3289
     * @return bool
3290
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3291
     */
3292 2
    public static function is_bom($str): bool
3293
    {
3294
        /** @noinspection PhpUnusedLocalVariableInspection */
3295 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3296 2
            if ($str === $bom_string) {
3297 2
                return true;
3298
            }
3299
        }
3300
3301 2
        return false;
3302
    }
3303
3304
    /**
3305
     * Determine whether the string is considered to be empty.
3306
     *
3307
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3308
     * empty() does not generate a warning if the variable does not exist.
3309
     *
3310
     * @param mixed $str
3311
     *
3312
     * @return bool whether or not $str is empty()
3313
     */
3314
    public static function is_empty($str): bool
3315
    {
3316
        return empty($str);
3317
    }
3318
3319
    /**
3320
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3321
     *
3322
     * @param string $str <p>The input string.</p>
3323
     *
3324
     * @return bool
3325
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3326
     */
3327 13
    public static function is_hexadecimal(string $str): bool
3328
    {
3329 13
        if (self::$SUPPORT['mbstring'] === true) {
3330
            /** @noinspection PhpComposerExtensionStubsInspection */
3331 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3332
        }
3333
3334
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3335
    }
3336
3337
    /**
3338
     * Check if the string contains any HTML tags.
3339
     *
3340
     * @param string $str <p>The input string.</p>
3341
     *
3342
     * @return bool
3343
     *              <p>Whether or not $str contains html elements.</p>
3344
     */
3345 3
    public static function is_html(string $str): bool
3346
    {
3347 3
        if ($str === '') {
3348 3
            return false;
3349
        }
3350
3351
        // init
3352 3
        $matches = [];
3353
3354 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3355
3356 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3357
3358 3
        return $matches !== [];
3359
    }
3360
3361
    /**
3362
     * Try to check if "$str" is a JSON-string.
3363
     *
3364
     * @param string $str                                    <p>The input string.</p>
3365
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json results.</p>
3366
     *
3367
     * @return bool
3368
     *              <p>Whether or not the $str is in JSON format.</p>
3369
     */
3370 42
    public static function is_json(
3371
        string $str,
3372
        $only_array_or_object_results_are_valid = true
3373
    ): bool {
3374 42
        if ($str === '') {
3375 4
            return false;
3376
        }
3377
3378 40
        if (self::$SUPPORT['json'] === false) {
3379
            throw new \RuntimeException('ext-json: is not installed');
3380
        }
3381
3382 40
        $json = self::json_decode($str);
3383 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3384 18
            return false;
3385
        }
3386
3387
        if (
3388 24
            $only_array_or_object_results_are_valid === true
3389
            &&
3390 24
            \is_object($json) === false
3391
            &&
3392 24
            \is_array($json) === false
3393
        ) {
3394 5
            return false;
3395
        }
3396
3397
        /** @noinspection PhpComposerExtensionStubsInspection */
3398 19
        return \json_last_error() === \JSON_ERROR_NONE;
3399
    }
3400
3401
    /**
3402
     * @param string $str <p>The input string.</p>
3403
     *
3404
     * @return bool
3405
     *              <p>Whether or not $str contains only lowercase chars.</p>
3406
     */
3407 8
    public static function is_lowercase(string $str): bool
3408
    {
3409 8
        if (self::$SUPPORT['mbstring'] === true) {
3410
            /** @noinspection PhpComposerExtensionStubsInspection */
3411 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3412
        }
3413
3414
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3415
    }
3416
3417
    /**
3418
     * Returns true if the string is serialized, false otherwise.
3419
     *
3420
     * @param string $str <p>The input string.</p>
3421
     *
3422
     * @return bool
3423
     *              <p>Whether or not $str is serialized.</p>
3424
     */
3425 7
    public static function is_serialized(string $str): bool
3426
    {
3427 7
        if ($str === '') {
3428 1
            return false;
3429
        }
3430
3431
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3432
        /** @noinspection UnserializeExploitsInspection */
3433 6
        return $str === 'b:0;'
3434
               ||
3435 6
               @\unserialize($str) !== false;
3436
    }
3437
3438
    /**
3439
     * Returns true if the string contains only lower case chars, false
3440
     * otherwise.
3441
     *
3442
     * @param string $str <p>The input string.</p>
3443
     *
3444
     * @return bool
3445
     *              <p>Whether or not $str contains only lower case characters.</p>
3446
     */
3447 8
    public static function is_uppercase(string $str): bool
3448
    {
3449 8
        if (self::$SUPPORT['mbstring'] === true) {
3450
            /** @noinspection PhpComposerExtensionStubsInspection */
3451 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3452
        }
3453
3454
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3455
    }
3456
3457
    /**
3458
     * Check if the string is UTF-16.
3459
     *
3460
     * @param mixed $str                       <p>The input string.</p>
3461
     * @param bool  $check_if_string_is_binary
3462
     *
3463
     * @return false|int
3464
     *                   <strong>false</strong> if is't not UTF-16,<br>
3465
     *                   <strong>1</strong> for UTF-16LE,<br>
3466
     *                   <strong>2</strong> for UTF-16BE
3467
     */
3468 22
    public static function is_utf16($str, $check_if_string_is_binary = true)
3469
    {
3470
        // init
3471 22
        $str = (string) $str;
3472 22
        $str_chars = [];
3473
3474
        if (
3475 22
            $check_if_string_is_binary === true
3476
            &&
3477 22
            self::is_binary($str, true) === false
3478
        ) {
3479 2
            return false;
3480
        }
3481
3482 22
        if (self::$SUPPORT['mbstring'] === false) {
3483 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3484
        }
3485
3486 22
        $str = self::remove_bom($str);
3487
3488 22
        $maybe_utf16le = 0;
3489 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3490 22
        if ($test) {
3491 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3492 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3493 15
            if ($test3 === $test) {
3494
                /**
3495
                 * @psalm-suppress RedundantCondition
3496
                 */
3497 15
                if ($str_chars === []) {
3498 15
                    $str_chars = self::count_chars($str, true, false);
3499
                }
3500 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3501 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3502 15
                        ++$maybe_utf16le;
3503
                    }
3504
                }
3505 15
                unset($test3charEmpty);
3506
            }
3507
        }
3508
3509 22
        $maybe_utf16be = 0;
3510 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3511 22
        if ($test) {
3512 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3513 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3514 15
            if ($test3 === $test) {
3515 15
                if ($str_chars === []) {
3516 7
                    $str_chars = self::count_chars($str, true, false);
3517
                }
3518 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3519 15
                    if (\in_array($test3char, $str_chars, true) === true) {
3520 15
                        ++$maybe_utf16be;
3521
                    }
3522
                }
3523 15
                unset($test3charEmpty);
3524
            }
3525
        }
3526
3527 22
        if ($maybe_utf16be !== $maybe_utf16le) {
3528 7
            if ($maybe_utf16le > $maybe_utf16be) {
3529 5
                return 1;
3530
            }
3531
3532 6
            return 2;
3533
        }
3534
3535 18
        return false;
3536
    }
3537
3538
    /**
3539
     * Check if the string is UTF-32.
3540
     *
3541
     * @param mixed $str                       <p>The input string.</p>
3542
     * @param bool  $check_if_string_is_binary
3543
     *
3544
     * @return false|int
3545
     *                   <strong>false</strong> if is't not UTF-32,<br>
3546
     *                   <strong>1</strong> for UTF-32LE,<br>
3547
     *                   <strong>2</strong> for UTF-32BE
3548
     */
3549 20
    public static function is_utf32($str, $check_if_string_is_binary = true)
3550
    {
3551
        // init
3552 20
        $str = (string) $str;
3553 20
        $str_chars = [];
3554
3555
        if (
3556 20
            $check_if_string_is_binary === true
3557
            &&
3558 20
            self::is_binary($str, true) === false
3559
        ) {
3560 2
            return false;
3561
        }
3562
3563 20
        if (self::$SUPPORT['mbstring'] === false) {
3564 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3565
        }
3566
3567 20
        $str = self::remove_bom($str);
3568
3569 20
        $maybe_utf32le = 0;
3570 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3571 20
        if ($test) {
3572 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3573 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3574 13
            if ($test3 === $test) {
3575
                /**
3576
                 * @psalm-suppress RedundantCondition
3577
                 */
3578 13
                if ($str_chars === []) {
3579 13
                    $str_chars = self::count_chars($str, true, false);
3580
                }
3581 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3582 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3583 13
                        ++$maybe_utf32le;
3584
                    }
3585
                }
3586 13
                unset($test3charEmpty);
3587
            }
3588
        }
3589
3590 20
        $maybe_utf32be = 0;
3591 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3592 20
        if ($test) {
3593 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3594 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3595 13
            if ($test3 === $test) {
3596 13
                if ($str_chars === []) {
3597 7
                    $str_chars = self::count_chars($str, true, false);
3598
                }
3599 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3600 13
                    if (\in_array($test3char, $str_chars, true) === true) {
3601 13
                        ++$maybe_utf32be;
3602
                    }
3603
                }
3604 13
                unset($test3charEmpty);
3605
            }
3606
        }
3607
3608 20
        if ($maybe_utf32be !== $maybe_utf32le) {
3609 3
            if ($maybe_utf32le > $maybe_utf32be) {
3610 2
                return 1;
3611
            }
3612
3613 3
            return 2;
3614
        }
3615
3616 20
        return false;
3617
    }
3618
3619
    /**
3620
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
3621
     *
3622
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
3623
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3624
     *
3625
     * @return bool
3626
     */
3627 82
    public static function is_utf8($str, bool $strict = false): bool
3628
    {
3629 82
        if (\is_array($str) === true) {
3630 2
            foreach ($str as &$v) {
3631 2
                if (self::is_utf8($v, $strict) === false) {
3632 2
                    return false;
3633
                }
3634
            }
3635
3636
            return true;
3637
        }
3638
3639 82
        return self::is_utf8_string((string) $str, $strict);
3640
    }
3641
3642
    /**
3643
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3644
     * Decodes a JSON string
3645
     *
3646
     * @see http://php.net/manual/en/function.json-decode.php
3647
     *
3648
     * @param string $json    <p>
3649
     *                        The <i>json</i> string being decoded.
3650
     *                        </p>
3651
     *                        <p>
3652
     *                        This function only works with UTF-8 encoded strings.
3653
     *                        </p>
3654
     *                        <p>PHP implements a superset of
3655
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3656
     *                        only supports these values when they are nested inside an array or an object.
3657
     *                        </p>
3658
     * @param bool   $assoc   [optional] <p>
3659
     *                        When <b>TRUE</b>, returned objects will be converted into
3660
     *                        associative arrays.
3661
     *                        </p>
3662
     * @param int    $depth   [optional] <p>
3663
     *                        User specified recursion depth.
3664
     *                        </p>
3665
     * @param int    $options [optional] <p>
3666
     *                        Bitmask of JSON decode options. Currently only
3667
     *                        <b>JSON_BIGINT_AS_STRING</b>
3668
     *                        is supported (default is to cast large integers as floats)
3669
     *                        </p>
3670
     *
3671
     * @return mixed
3672
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3673
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3674
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3675
     *               is deeper than the recursion limit.
3676
     */
3677 43
    public static function json_decode(
3678
        string $json,
3679
        bool $assoc = false,
3680
        int $depth = 512,
3681
        int $options = 0
3682
    ) {
3683 43
        $json = self::filter($json);
3684
3685 43
        if (self::$SUPPORT['json'] === false) {
3686
            throw new \RuntimeException('ext-json: is not installed');
3687
        }
3688
3689
        /** @noinspection PhpComposerExtensionStubsInspection */
3690 43
        return \json_decode($json, $assoc, $depth, $options);
3691
    }
3692
3693
    /**
3694
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3695
     * Returns the JSON representation of a value.
3696
     *
3697
     * @see http://php.net/manual/en/function.json-encode.php
3698
     *
3699
     * @param mixed $value   <p>
3700
     *                       The <i>value</i> being encoded. Can be any type except
3701
     *                       a resource.
3702
     *                       </p>
3703
     *                       <p>
3704
     *                       All string data must be UTF-8 encoded.
3705
     *                       </p>
3706
     *                       <p>PHP implements a superset of
3707
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3708
     *                       only supports these values when they are nested inside an array or an object.
3709
     *                       </p>
3710
     * @param int   $options [optional] <p>
3711
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3712
     *                       <b>JSON_HEX_TAG</b>,
3713
     *                       <b>JSON_HEX_AMP</b>,
3714
     *                       <b>JSON_HEX_APOS</b>,
3715
     *                       <b>JSON_NUMERIC_CHECK</b>,
3716
     *                       <b>JSON_PRETTY_PRINT</b>,
3717
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3718
     *                       <b>JSON_FORCE_OBJECT</b>,
3719
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3720
     *                       constants is described on
3721
     *                       the JSON constants page.
3722
     *                       </p>
3723
     * @param int   $depth   [optional] <p>
3724
     *                       Set the maximum depth. Must be greater than zero.
3725
     *                       </p>
3726
     *
3727
     * @return false|string
3728
     *                      A JSON encoded <strong>string</strong> on success or<br>
3729
     *                      <strong>FALSE</strong> on failure
3730
     */
3731 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3732
    {
3733 5
        $value = self::filter($value);
3734
3735 5
        if (self::$SUPPORT['json'] === false) {
3736
            throw new \RuntimeException('ext-json: is not installed');
3737
        }
3738
3739
        /** @noinspection PhpComposerExtensionStubsInspection */
3740 5
        return \json_encode($value, $options, $depth);
3741
    }
3742
3743
    /**
3744
     * Checks whether JSON is available on the server.
3745
     *
3746
     * @return bool
3747
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3748
     */
3749
    public static function json_loaded(): bool
3750
    {
3751
        return \function_exists('json_decode');
3752
    }
3753
3754
    /**
3755
     * Makes string's first char lowercase.
3756
     *
3757
     * @param string      $str                           <p>The input string</p>
3758
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
3759
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3760
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3761
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3762
     *
3763
     * @return string the resulting string
3764
     */
3765 46
    public static function lcfirst(
3766
        string $str,
3767
        string $encoding = 'UTF-8',
3768
        bool $clean_utf8 = false,
3769
        string $lang = null,
3770
        bool $try_to_keep_the_string_length = false
3771
    ): string {
3772 46
        if ($clean_utf8 === true) {
3773
            $str = self::clean($str);
3774
        }
3775
3776 46
        $use_mb_functions = ($lang === null && $try_to_keep_the_string_length === false);
3777
3778 46
        if ($encoding === 'UTF-8') {
3779 43
            $str_part_two = (string) \mb_substr($str, 1);
3780
3781 43
            if ($use_mb_functions === true) {
3782 43
                $str_part_one = \mb_strtolower(
3783 43
                    (string) \mb_substr($str, 0, 1)
3784
                );
3785
            } else {
3786
                $str_part_one = self::strtolower(
3787
                    (string) \mb_substr($str, 0, 1),
3788
                    $encoding,
3789
                    false,
3790
                    $lang,
3791 43
                    $try_to_keep_the_string_length
3792
                );
3793
            }
3794
        } else {
3795 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3796
3797 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
3798
3799 3
            $str_part_one = self::strtolower(
3800 3
                (string) self::substr($str, 0, 1, $encoding),
3801 3
                $encoding,
3802 3
                false,
3803 3
                $lang,
3804 3
                $try_to_keep_the_string_length
3805
            );
3806
        }
3807
3808 46
        return $str_part_one . $str_part_two;
3809
    }
3810
3811
    /**
3812
     * alias for "UTF8::lcfirst()"
3813
     *
3814
     * @param string      $str
3815
     * @param string      $encoding
3816
     * @param bool        $clean_utf8
3817
     * @param string|null $lang
3818
     * @param bool        $try_to_keep_the_string_length
3819
     *
3820
     * @return string
3821
     *
3822
     * @see UTF8::lcfirst()
3823
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3824
     */
3825 2
    public static function lcword(
3826
        string $str,
3827
        string $encoding = 'UTF-8',
3828
        bool $clean_utf8 = false,
3829
        string $lang = null,
3830
        bool $try_to_keep_the_string_length = false
3831
    ): string {
3832 2
        return self::lcfirst(
3833 2
            $str,
3834 2
            $encoding,
3835 2
            $clean_utf8,
3836 2
            $lang,
3837 2
            $try_to_keep_the_string_length
3838
        );
3839
    }
3840
3841
    /**
3842
     * Lowercase for all words in the string.
3843
     *
3844
     * @param string      $str                           <p>The input string.</p>
3845
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
3846
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do not start
3847
     *                                                   a new word.</p>
3848
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
3849
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
3850
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3851
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3852
     *
3853
     * @return string
3854
     */
3855 2
    public static function lcwords(
3856
        string $str,
3857
        array $exceptions = [],
3858
        string $char_list = '',
3859
        string $encoding = 'UTF-8',
3860
        bool $clean_utf8 = false,
3861
        string $lang = null,
3862
        bool $try_to_keep_the_string_length = false
3863
    ): string {
3864 2
        if (!$str) {
3865 2
            return '';
3866
        }
3867
3868 2
        $words = self::str_to_words($str, $char_list);
3869 2
        $use_exceptions = $exceptions !== [];
3870
3871 2
        $words_str = '';
3872 2
        foreach ($words as &$word) {
3873 2
            if (!$word) {
3874 2
                continue;
3875
            }
3876
3877
            if (
3878 2
                $use_exceptions === false
3879
                ||
3880 2
                !\in_array($word, $exceptions, true)
3881
            ) {
3882 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
3883
            } else {
3884 2
                $words_str .= $word;
3885
            }
3886
        }
3887
3888 2
        return $words_str;
3889
    }
3890
3891
    /**
3892
     * alias for "UTF8::lcfirst()"
3893
     *
3894
     * @param string      $str
3895
     * @param string      $encoding
3896
     * @param bool        $clean_utf8
3897
     * @param string|null $lang
3898
     * @param bool        $try_to_keep_the_string_length
3899
     *
3900
     * @return string
3901
     *
3902
     * @see UTF8::lcfirst()
3903
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
3904
     */
3905 5
    public static function lowerCaseFirst(
3906
        string $str,
3907
        string $encoding = 'UTF-8',
3908
        bool $clean_utf8 = false,
3909
        string $lang = null,
3910
        bool $try_to_keep_the_string_length = false
3911
    ): string {
3912 5
        return self::lcfirst(
3913 5
            $str,
3914 5
            $encoding,
3915 5
            $clean_utf8,
3916 5
            $lang,
3917 5
            $try_to_keep_the_string_length
3918
        );
3919
    }
3920
3921
    /**
3922
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
3923
     *
3924
     * @param string      $str   <p>The string to be trimmed</p>
3925
     * @param string|null $chars <p>Optional characters to be stripped</p>
3926
     *
3927
     * @return string the string with unwanted characters stripped from the left
3928
     */
3929 22
    public static function ltrim(string $str = '', string $chars = null): string
3930
    {
3931 22
        if ($str === '') {
3932 3
            return '';
3933
        }
3934
3935 21
        if (self::$SUPPORT['mbstring'] === true) {
3936 21
            if ($chars) {
3937
                /** @noinspection PregQuoteUsageInspection */
3938 10
                $chars = \preg_quote($chars);
3939 10
                $pattern = "^[${chars}]+";
3940
            } else {
3941 14
                $pattern = '^[\\s]+';
3942
            }
3943
3944
            /** @noinspection PhpComposerExtensionStubsInspection */
3945 21
            return (string) \mb_ereg_replace($pattern, '', $str);
3946
        }
3947
3948
        if ($chars) {
3949
            $chars = \preg_quote($chars, '/');
3950
            $pattern = "^[${chars}]+";
3951
        } else {
3952
            $pattern = '^[\\s]+';
3953
        }
3954
3955
        return self::regex_replace($str, $pattern, '', '', '/');
3956
    }
3957
3958
    /**
3959
     * Returns the UTF-8 character with the maximum code point in the given data.
3960
     *
3961
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3962
     *
3963
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
3964
     */
3965 2
    public static function max($arg)
3966
    {
3967 2
        if (\is_array($arg) === true) {
3968 2
            $arg = \implode('', $arg);
3969
        }
3970
3971 2
        $codepoints = self::codepoints($arg, false);
3972 2
        if ($codepoints === []) {
3973 2
            return null;
3974
        }
3975
3976 2
        $codepoint_max = \max($codepoints);
3977
3978 2
        return self::chr($codepoint_max);
3979
    }
3980
3981
    /**
3982
     * Calculates and returns the maximum number of bytes taken by any
3983
     * UTF-8 encoded character in the given string.
3984
     *
3985
     * @param string $str <p>The original Unicode string.</p>
3986
     *
3987
     * @return int
3988
     *             <p>Max byte lengths of the given chars.</p>
3989
     */
3990 2
    public static function max_chr_width(string $str): int
3991
    {
3992 2
        $bytes = self::chr_size_list($str);
3993 2
        if ($bytes !== []) {
3994 2
            return (int) \max($bytes);
3995
        }
3996
3997 2
        return 0;
3998
    }
3999
4000
    /**
4001
     * Checks whether mbstring is available on the server.
4002
     *
4003
     * @return bool
4004
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4005
     */
4006 26
    public static function mbstring_loaded(): bool
4007
    {
4008 26
        return \extension_loaded('mbstring');
4009
    }
4010
4011
    /**
4012
     * Returns the UTF-8 character with the minimum code point in the given data.
4013
     *
4014
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4015
     *
4016
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4017
     */
4018 2
    public static function min($arg)
4019
    {
4020 2
        if (\is_array($arg) === true) {
4021 2
            $arg = \implode('', $arg);
4022
        }
4023
4024 2
        $codepoints = self::codepoints($arg, false);
4025 2
        if ($codepoints === []) {
4026 2
            return null;
4027
        }
4028
4029 2
        $codepoint_min = \min($codepoints);
4030
4031 2
        return self::chr($codepoint_min);
4032
    }
4033
4034
    /**
4035
     * alias for "UTF8::normalize_encoding()"
4036
     *
4037
     * @param mixed $encoding
4038
     * @param mixed $fallback
4039
     *
4040
     * @return mixed
4041
     *
4042
     * @see UTF8::normalize_encoding()
4043
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4044
     */
4045 2
    public static function normalizeEncoding($encoding, $fallback = '')
4046
    {
4047 2
        return self::normalize_encoding($encoding, $fallback);
4048
    }
4049
4050
    /**
4051
     * Normalize the encoding-"name" input.
4052
     *
4053
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4054
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4055
     *
4056
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4057
     */
4058 331
    public static function normalize_encoding($encoding, $fallback = '')
4059
    {
4060 331
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4061
4062
        // init
4063 331
        $encoding = (string) $encoding;
4064
4065 331
        if (!$encoding) {
4066 285
            return $fallback;
4067
        }
4068
4069
        if (
4070 51
            $encoding === 'UTF-8'
4071
            ||
4072 51
            $encoding === 'UTF8'
4073
        ) {
4074 28
            return 'UTF-8';
4075
        }
4076
4077
        if (
4078 43
            $encoding === '8BIT'
4079
            ||
4080 43
            $encoding === 'BINARY'
4081
        ) {
4082
            return 'CP850';
4083
        }
4084
4085
        if (
4086 43
            $encoding === 'HTML'
4087
            ||
4088 43
            $encoding === 'HTML-ENTITIES'
4089
        ) {
4090 2
            return 'HTML-ENTITIES';
4091
        }
4092
4093
        if (
4094 43
            $encoding === 'ISO'
4095
            ||
4096 43
            $encoding === 'ISO-8859-1'
4097
        ) {
4098 39
            return 'ISO-8859-1';
4099
        }
4100
4101
        if (
4102 12
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4103
            ||
4104 12
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4105
        ) {
4106 1
            return $fallback;
4107
        }
4108
4109 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4110 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4111
        }
4112
4113 5
        if (self::$ENCODINGS === null) {
4114 1
            self::$ENCODINGS = self::getData('encodings');
4115
        }
4116
4117 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4118 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4119
4120 3
            return $encoding;
4121
        }
4122
4123 4
        $encoding_original = $encoding;
4124 4
        $encoding = \strtoupper($encoding);
4125 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4126
4127
        $equivalences = [
4128 4
            'ISO8859'     => 'ISO-8859-1',
4129
            'ISO88591'    => 'ISO-8859-1',
4130
            'ISO'         => 'ISO-8859-1',
4131
            'LATIN'       => 'ISO-8859-1',
4132
            'LATIN1'      => 'ISO-8859-1', // Western European
4133
            'ISO88592'    => 'ISO-8859-2',
4134
            'LATIN2'      => 'ISO-8859-2', // Central European
4135
            'ISO88593'    => 'ISO-8859-3',
4136
            'LATIN3'      => 'ISO-8859-3', // Southern European
4137
            'ISO88594'    => 'ISO-8859-4',
4138
            'LATIN4'      => 'ISO-8859-4', // Northern European
4139
            'ISO88595'    => 'ISO-8859-5',
4140
            'ISO88596'    => 'ISO-8859-6', // Greek
4141
            'ISO88597'    => 'ISO-8859-7',
4142
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4143
            'ISO88599'    => 'ISO-8859-9',
4144
            'LATIN5'      => 'ISO-8859-9', // Turkish
4145
            'ISO885911'   => 'ISO-8859-11',
4146
            'TIS620'      => 'ISO-8859-11', // Thai
4147
            'ISO885910'   => 'ISO-8859-10',
4148
            'LATIN6'      => 'ISO-8859-10', // Nordic
4149
            'ISO885913'   => 'ISO-8859-13',
4150
            'LATIN7'      => 'ISO-8859-13', // Baltic
4151
            'ISO885914'   => 'ISO-8859-14',
4152
            'LATIN8'      => 'ISO-8859-14', // Celtic
4153
            'ISO885915'   => 'ISO-8859-15',
4154
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4155
            'ISO885916'   => 'ISO-8859-16',
4156
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4157
            'CP1250'      => 'WINDOWS-1250',
4158
            'WIN1250'     => 'WINDOWS-1250',
4159
            'WINDOWS1250' => 'WINDOWS-1250',
4160
            'CP1251'      => 'WINDOWS-1251',
4161
            'WIN1251'     => 'WINDOWS-1251',
4162
            'WINDOWS1251' => 'WINDOWS-1251',
4163
            'CP1252'      => 'WINDOWS-1252',
4164
            'WIN1252'     => 'WINDOWS-1252',
4165
            'WINDOWS1252' => 'WINDOWS-1252',
4166
            'CP1253'      => 'WINDOWS-1253',
4167
            'WIN1253'     => 'WINDOWS-1253',
4168
            'WINDOWS1253' => 'WINDOWS-1253',
4169
            'CP1254'      => 'WINDOWS-1254',
4170
            'WIN1254'     => 'WINDOWS-1254',
4171
            'WINDOWS1254' => 'WINDOWS-1254',
4172
            'CP1255'      => 'WINDOWS-1255',
4173
            'WIN1255'     => 'WINDOWS-1255',
4174
            'WINDOWS1255' => 'WINDOWS-1255',
4175
            'CP1256'      => 'WINDOWS-1256',
4176
            'WIN1256'     => 'WINDOWS-1256',
4177
            'WINDOWS1256' => 'WINDOWS-1256',
4178
            'CP1257'      => 'WINDOWS-1257',
4179
            'WIN1257'     => 'WINDOWS-1257',
4180
            'WINDOWS1257' => 'WINDOWS-1257',
4181
            'CP1258'      => 'WINDOWS-1258',
4182
            'WIN1258'     => 'WINDOWS-1258',
4183
            'WINDOWS1258' => 'WINDOWS-1258',
4184
            'UTF16'       => 'UTF-16',
4185
            'UTF32'       => 'UTF-32',
4186
            'UTF8'        => 'UTF-8',
4187
            'UTF'         => 'UTF-8',
4188
            'UTF7'        => 'UTF-7',
4189
            '8BIT'        => 'CP850',
4190
            'BINARY'      => 'CP850',
4191
        ];
4192
4193 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4194 3
            $encoding = $equivalences[$encoding_upper_helper];
4195
        }
4196
4197 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4198
4199 4
        return $encoding;
4200
    }
4201
4202
    /**
4203
     * Standardize line ending to unix-like.
4204
     *
4205
     * @param string $str      <p>The input string.</p>
4206
     * @param string $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL here.</p>
4207
     *
4208
     * @return string
4209
     *                <p>A string with normalized line ending.</p>
4210
     */
4211 5
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4212
    {
4213 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4214
    }
4215
4216
    /**
4217
     * Normalize some MS Word special characters.
4218
     *
4219
     * @param string $str <p>The string to be normalized.</p>
4220
     *
4221
     * @return string
4222
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4223
     */
4224 10
    public static function normalize_msword(string $str): string
4225
    {
4226 10
        return ASCII::normalize_msword($str);
4227
    }
4228
4229
    /**
4230
     * Normalize the whitespace.
4231
     *
4232
     * @param string $str                        <p>The string to be normalized.</p>
4233
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4234
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4235
     *                                           bidirectional text chars.</p>
4236
     *
4237
     * @return string
4238
     *                <p>A string with normalized whitespace.</p>
4239
     */
4240 61
    public static function normalize_whitespace(
4241
        string $str,
4242
        bool $keep_non_breaking_space = false,
4243
        bool $keep_bidi_unicode_controls = false
4244
    ): string {
4245 61
        return ASCII::normalize_whitespace(
4246 61
            $str,
4247 61
            $keep_non_breaking_space,
4248 61
            $keep_bidi_unicode_controls
4249
        );
4250
    }
4251
4252
    /**
4253
     * Calculates Unicode code point of the given UTF-8 encoded character.
4254
     *
4255
     * INFO: opposite to UTF8::chr()
4256
     *
4257
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4258
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4259
     *
4260
     * @return int
4261
     *             <p>Unicode code point of the given character,<br>
4262
     *             0 on invalid UTF-8 byte sequence</p>
4263
     */
4264 26
    public static function ord($chr, string $encoding = 'UTF-8'): int
4265
    {
4266 26
        static $CHAR_CACHE = [];
4267
4268
        // init
4269 26
        $chr = (string) $chr;
4270
4271 26
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4272 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4273
        }
4274
4275 26
        $cache_key = $chr . $encoding;
4276 26
        if (isset($CHAR_CACHE[$cache_key]) === true) {
4277 26
            return $CHAR_CACHE[$cache_key];
4278
        }
4279
4280
        // check again, if it's still not UTF-8
4281 10
        if ($encoding !== 'UTF-8') {
4282 3
            $chr = self::encode($encoding, $chr);
4283
        }
4284
4285 10
        if (self::$ORD === null) {
4286
            self::$ORD = self::getData('ord');
4287
        }
4288
4289 10
        if (isset(self::$ORD[$chr])) {
4290 10
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
4291
        }
4292
4293
        //
4294
        // fallback via "IntlChar"
4295
        //
4296
4297 6
        if (self::$SUPPORT['intlChar'] === true) {
4298
            /** @noinspection PhpComposerExtensionStubsInspection */
4299 5
            $code = \IntlChar::ord($chr);
4300 5
            if ($code) {
4301 5
                return $CHAR_CACHE[$cache_key] = $code;
4302
            }
4303
        }
4304
4305
        //
4306
        // fallback via vanilla php
4307
        //
4308
4309
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
4310 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4311
        /** @noinspection OffsetOperationsInspection */
4312 1
        $code = $chr ? $chr[1] : 0;
4313
4314
        /** @noinspection OffsetOperationsInspection */
4315 1
        if ($code >= 0xF0 && isset($chr[4])) {
4316
            /** @noinspection UnnecessaryCastingInspection */
4317
            /** @noinspection OffsetOperationsInspection */
4318
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4319
        }
4320
4321
        /** @noinspection OffsetOperationsInspection */
4322 1
        if ($code >= 0xE0 && isset($chr[3])) {
4323
            /** @noinspection UnnecessaryCastingInspection */
4324
            /** @noinspection OffsetOperationsInspection */
4325 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4326
        }
4327
4328
        /** @noinspection OffsetOperationsInspection */
4329 1
        if ($code >= 0xC0 && isset($chr[2])) {
4330
            /** @noinspection UnnecessaryCastingInspection */
4331
            /** @noinspection OffsetOperationsInspection */
4332 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4333
        }
4334
4335
        return $CHAR_CACHE[$cache_key] = $code;
4336
    }
4337
4338
    /**
4339
     * Parses the string into an array (into the the second parameter).
4340
     *
4341
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
4342
     *          if the second parameter is not set!
4343
     *
4344
     * @see http://php.net/manual/en/function.parse-str.php
4345
     *
4346
     * @param string $str        <p>The input string.</p>
4347
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
4348
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4349
     *
4350
     * @return bool
4351
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
4352
     */
4353 2
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
4354
    {
4355 2
        if ($clean_utf8 === true) {
4356 2
            $str = self::clean($str);
4357
        }
4358
4359 2
        if (self::$SUPPORT['mbstring'] === true) {
4360 2
            $return = \mb_parse_str($str, $result);
4361
4362 2
            return $return !== false && $result !== [];
4363
        }
4364
4365
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4366
        \parse_str($str, $result);
4367
4368
        return $result !== [];
4369
    }
4370
4371
    /**
4372
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4373
     *
4374
     * @return bool
4375
     *              <p>
4376
     *              <strong>true</strong> if support is available,<br>
4377
     *              <strong>false</strong> otherwise
4378
     *              </p>
4379
     */
4380 102
    public static function pcre_utf8_support(): bool
4381
    {
4382
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4383 102
        return (bool) @\preg_match('//u', '');
4384
    }
4385
4386
    /**
4387
     * Create an array containing a range of UTF-8 characters.
4388
     *
4389
     * @param mixed     $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4390
     * @param mixed     $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4391
     * @param bool      $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple "is_numeric"</p>
4392
     * @param string    $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
4393
     * @param float|int $step      [optional] <p>
4394
     *                             If a step value is given, it will be used as the
4395
     *                             increment between elements in the sequence. step
4396
     *                             should be given as a positive number. If not specified,
4397
     *                             step will default to 1.
4398
     *                             </p>
4399
     *
4400
     * @return string[]
4401
     */
4402 2
    public static function range(
4403
        $var1,
4404
        $var2,
4405
        bool $use_ctype = true,
4406
        string $encoding = 'UTF-8',
4407
        $step = 1
4408
    ): array {
4409 2
        if (!$var1 || !$var2) {
4410 2
            return [];
4411
        }
4412
4413 2
        if ($step !== 1) {
4414
            /**
4415
             * @psalm-suppress RedundantConditionGivenDocblockType
4416
             */
4417 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
4418
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
4419
            }
4420
4421
            /**
4422
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
4423
             */
4424 1
            if ($step <= 0) {
4425
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
4426
            }
4427
        }
4428
4429 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
4430
            throw new \RuntimeException('ext-ctype: is not installed');
4431
        }
4432
4433 2
        $is_digit = false;
4434 2
        $is_xdigit = false;
4435
4436
        /** @noinspection PhpComposerExtensionStubsInspection */
4437 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
4438 2
            $is_digit = true;
4439 2
            $start = (int) $var1;
4440 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
4441
            $is_xdigit = true;
4442
            $start = (int) self::hex_to_int($var1);
4443 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
4444 1
            $start = (int) $var1;
4445
        } else {
4446 2
            $start = self::ord($var1);
4447
        }
4448
4449 2
        if (!$start) {
4450
            return [];
4451
        }
4452
4453 2
        if ($is_digit) {
4454 2
            $end = (int) $var2;
4455 2
        } elseif ($is_xdigit) {
4456
            $end = (int) self::hex_to_int($var2);
4457 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
4458 1
            $end = (int) $var2;
4459
        } else {
4460 2
            $end = self::ord($var2);
4461
        }
4462
4463 2
        if (!$end) {
4464
            return [];
4465
        }
4466
4467 2
        $array = [];
4468 2
        foreach (\range($start, $end, $step) as $i) {
4469 2
            $array[] = (string) self::chr((int) $i, $encoding);
4470
        }
4471
4472 2
        return $array;
4473
    }
4474
4475
    /**
4476
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
4477
     *
4478
     * e.g:
4479
     * 'test+test'                     => 'test+test'
4480
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4481
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4482
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4483
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4484
     * 'Düsseldorf'                   => 'Düsseldorf'
4485
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4486
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4487
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4488
     *
4489
     * @param string $str          <p>The input string.</p>
4490
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4491
     *
4492
     * @return string
4493
     *                <p>The decoded URL, as a string.</p>
4494
     */
4495 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4496
    {
4497 6
        if ($str === '') {
4498 4
            return '';
4499
        }
4500
4501
        if (
4502 6
            \strpos($str, '&') === false
4503
            &&
4504 6
            \strpos($str, '%') === false
4505
            &&
4506 6
            \strpos($str, '+') === false
4507
            &&
4508 6
            \strpos($str, '\u') === false
4509
        ) {
4510 4
            return self::fix_simple_utf8($str);
4511
        }
4512
4513 6
        $str = self::urldecode_unicode_helper($str);
4514
4515 6
        if ($multi_decode) {
4516
            do {
4517 5
                $str_compare = $str;
4518
4519
                /**
4520
                 * @psalm-suppress PossiblyInvalidArgument
4521
                 */
4522 5
                $str = self::fix_simple_utf8(
4523 5
                    \rawurldecode(
4524 5
                        self::html_entity_decode(
4525 5
                            self::to_utf8($str),
4526 5
                            \ENT_QUOTES | \ENT_HTML5
4527
                        )
4528
                    )
4529
                );
4530 5
            } while ($str_compare !== $str);
4531
        } else {
4532
            /**
4533
             * @psalm-suppress PossiblyInvalidArgument
4534
             */
4535 1
            $str = self::fix_simple_utf8(
4536 1
                \rawurldecode(
4537 1
                    self::html_entity_decode(
4538 1
                        self::to_utf8($str),
4539 1
                        \ENT_QUOTES | \ENT_HTML5
4540
                    )
4541
                )
4542
            );
4543
        }
4544
4545 6
        return $str;
4546
    }
4547
4548
    /**
4549
     * Replaces all occurrences of $pattern in $str by $replacement.
4550
     *
4551
     * @param string $str         <p>The input string.</p>
4552
     * @param string $pattern     <p>The regular expression pattern.</p>
4553
     * @param string $replacement <p>The string to replace with.</p>
4554
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4555
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4556
     *
4557
     * @return string
4558
     */
4559 18
    public static function regex_replace(
4560
        string $str,
4561
        string $pattern,
4562
        string $replacement,
4563
        string $options = '',
4564
        string $delimiter = '/'
4565
    ): string {
4566 18
        if ($options === 'msr') {
4567 9
            $options = 'ms';
4568
        }
4569
4570
        // fallback
4571 18
        if (!$delimiter) {
4572
            $delimiter = '/';
4573
        }
4574
4575 18
        return (string) \preg_replace(
4576 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4577 18
            $replacement,
4578 18
            $str
4579
        );
4580
    }
4581
4582
    /**
4583
     * alias for "UTF8::remove_bom()"
4584
     *
4585
     * @param string $str
4586
     *
4587
     * @return string
4588
     *
4589
     * @see UTF8::remove_bom()
4590
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
4591
     */
4592
    public static function removeBOM(string $str): string
4593
    {
4594
        return self::remove_bom($str);
4595
    }
4596
4597
    /**
4598
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4599
     *
4600
     * @param string $str <p>The input string.</p>
4601
     *
4602
     * @return string
4603
     *                <p>A string without UTF-BOM.</p>
4604
     */
4605 55
    public static function remove_bom(string $str): string
4606
    {
4607 55
        if ($str === '') {
4608 9
            return '';
4609
        }
4610
4611 55
        $str_length = \strlen($str);
4612 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
4613 55
            if (\strpos($str, $bom_string, 0) === 0) {
4614
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
4615 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
4616 11
                if ($str_tmp === false) {
4617
                    return '';
4618
                }
4619
4620 11
                $str_length -= (int) $bom_byte_length;
4621
4622 55
                $str = (string) $str_tmp;
4623
            }
4624
        }
4625
4626 55
        return $str;
4627
    }
4628
4629
    /**
4630
     * Removes duplicate occurrences of a string in another string.
4631
     *
4632
     * @param string          $str  <p>The base string.</p>
4633
     * @param string|string[] $what <p>String to search for in the base string.</p>
4634
     *
4635
     * @return string
4636
     *                <p>A string with removed duplicates.</p>
4637
     */
4638 2
    public static function remove_duplicates(string $str, $what = ' '): string
4639
    {
4640 2
        if (\is_string($what) === true) {
4641 2
            $what = [$what];
4642
        }
4643
4644
        /**
4645
         * @psalm-suppress RedundantConditionGivenDocblockType
4646
         */
4647 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4648 2
            foreach ($what as $item) {
4649 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4650
            }
4651
        }
4652
4653 2
        return $str;
4654
    }
4655
4656
    /**
4657
     * Remove html via "strip_tags()" from the string.
4658
     *
4659
     * @param string $str            <p>The input string.</p>
4660
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which should
4661
     *                               not be stripped. Default: null
4662
     *                               </p>
4663
     *
4664
     * @return string
4665
     *                <p>A string with without html tags.</p>
4666
     */
4667 6
    public static function remove_html(string $str, string $allowable_tags = ''): string
4668
    {
4669 6
        return \strip_tags($str, $allowable_tags);
4670
    }
4671
4672
    /**
4673
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4674
     *
4675
     * @param string $str         <p>The input string.</p>
4676
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4677
     *
4678
     * @return string
4679
     *                <p>A string without breaks.</p>
4680
     */
4681 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4682
    {
4683 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4684
    }
4685
4686
    /**
4687
     * Remove invisible characters from a string.
4688
     *
4689
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4690
     *
4691
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4692
     *
4693
     * @param string $str         <p>The input string.</p>
4694
     * @param bool   $url_encoded [optional] <p>
4695
     *                            Try to remove url encoded control character.
4696
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
4697
     *                            <br>
4698
     *                            Default: false
4699
     *                            </p>
4700
     * @param string $replacement [optional] <p>The replacement character.</p>
4701
     *
4702
     * @return string
4703
     *                <p>A string without invisible chars.</p>
4704
     */
4705 89
    public static function remove_invisible_characters(
4706
        string $str,
4707
        bool $url_encoded = false,
4708
        string $replacement = ''
4709
    ): string {
4710 89
        return ASCII::remove_invisible_characters(
4711 89
            $str,
4712 89
            $url_encoded,
4713 89
            $replacement
4714
        );
4715
    }
4716
4717
    /**
4718
     * Returns a new string with the prefix $substring removed, if present.
4719
     *
4720
     * @param string $str       <p>The input string.</p>
4721
     * @param string $substring <p>The prefix to remove.</p>
4722
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4723
     *
4724
     * @return string
4725
     *                <p>A string without the prefix $substring.</p>
4726
     */
4727 12
    public static function remove_left(
4728
        string $str,
4729
        string $substring,
4730
        string $encoding = 'UTF-8'
4731
    ): string {
4732 12
        if ($substring && \strpos($str, $substring) === 0) {
4733 6
            if ($encoding === 'UTF-8') {
4734 4
                return (string) \mb_substr(
4735 4
                    $str,
4736 4
                    (int) \mb_strlen($substring)
4737
                );
4738
            }
4739
4740 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4741
4742 2
            return (string) self::substr(
4743 2
                $str,
4744 2
                (int) self::strlen($substring, $encoding),
4745 2
                null,
4746 2
                $encoding
4747
            );
4748
        }
4749
4750 6
        return $str;
4751
    }
4752
4753
    /**
4754
     * Returns a new string with the suffix $substring removed, if present.
4755
     *
4756
     * @param string $str
4757
     * @param string $substring <p>The suffix to remove.</p>
4758
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4759
     *
4760
     * @return string
4761
     *                <p>A string having a $str without the suffix $substring.</p>
4762
     */
4763 12
    public static function remove_right(
4764
        string $str,
4765
        string $substring,
4766
        string $encoding = 'UTF-8'
4767
    ): string {
4768 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4769 6
            if ($encoding === 'UTF-8') {
4770 4
                return (string) \mb_substr(
4771 4
                    $str,
4772 4
                    0,
4773 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4774
                );
4775
            }
4776
4777 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4778
4779 2
            return (string) self::substr(
4780 2
                $str,
4781 2
                0,
4782 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4783 2
                $encoding
4784
            );
4785
        }
4786
4787 6
        return $str;
4788
    }
4789
4790
    /**
4791
     * Replaces all occurrences of $search in $str by $replacement.
4792
     *
4793
     * @param string $str            <p>The input string.</p>
4794
     * @param string $search         <p>The needle to search for.</p>
4795
     * @param string $replacement    <p>The string to replace with.</p>
4796
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4797
     *
4798
     * @return string
4799
     *                <p>A string with replaced parts.</p>
4800
     */
4801 29
    public static function replace(
4802
        string $str,
4803
        string $search,
4804
        string $replacement,
4805
        bool $case_sensitive = true
4806
    ): string {
4807 29
        if ($case_sensitive) {
4808 22
            return \str_replace($search, $replacement, $str);
4809
        }
4810
4811 7
        return self::str_ireplace($search, $replacement, $str);
4812
    }
4813
4814
    /**
4815
     * Replaces all occurrences of $search in $str by $replacement.
4816
     *
4817
     * @param string       $str            <p>The input string.</p>
4818
     * @param array        $search         <p>The elements to search for.</p>
4819
     * @param array|string $replacement    <p>The string to replace with.</p>
4820
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4821
     *
4822
     * @return string
4823
     *                <p>A string with replaced parts.</p>
4824
     */
4825 30
    public static function replace_all(
4826
        string $str,
4827
        array $search,
4828
        $replacement,
4829
        bool $case_sensitive = true
4830
    ): string {
4831 30
        if ($case_sensitive) {
4832 23
            return \str_replace($search, $replacement, $str);
4833
        }
4834
4835 7
        return self::str_ireplace($search, $replacement, $str);
4836
    }
4837
4838
    /**
4839
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4840
     *
4841
     * @param string $str                        <p>The input string</p>
4842
     * @param string $replacement_char           <p>The replacement character.</p>
4843
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
4844
     *
4845
     * @return string
4846
     *                <p>A string without diamond question marks (�).</p>
4847
     */
4848 35
    public static function replace_diamond_question_mark(
4849
        string $str,
4850
        string $replacement_char = '',
4851
        bool $process_invalid_utf8_chars = true
4852
    ): string {
4853 35
        if ($str === '') {
4854 9
            return '';
4855
        }
4856
4857 35
        if ($process_invalid_utf8_chars === true) {
4858 35
            $replacement_char_helper = $replacement_char;
4859 35
            if ($replacement_char === '') {
4860 35
                $replacement_char_helper = 'none';
4861
            }
4862
4863 35
            if (self::$SUPPORT['mbstring'] === false) {
4864
                // if there is no native support for "mbstring",
4865
                // then we need to clean the string before ...
4866
                $str = self::clean($str);
4867
            }
4868
4869 35
            $save = \mb_substitute_character();
4870 35
            \mb_substitute_character($replacement_char_helper);
4871
            // the polyfill maybe return false, so cast to string
4872 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4873 35
            \mb_substitute_character($save);
4874
        }
4875
4876 35
        return \str_replace(
4877
            [
4878 35
                "\xEF\xBF\xBD",
4879
                '�',
4880
            ],
4881
            [
4882 35
                $replacement_char,
4883 35
                $replacement_char,
4884
            ],
4885 35
            $str
4886
        );
4887
    }
4888
4889
    /**
4890
     * Strip whitespace or other characters from the end of a UTF-8 string.
4891
     *
4892
     * @param string      $str   <p>The string to be trimmed.</p>
4893
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4894
     *
4895
     * @return string
4896
     *                <p>A string with unwanted characters stripped from the right.</p>
4897
     */
4898 20
    public static function rtrim(string $str = '', string $chars = null): string
4899
    {
4900 20
        if ($str === '') {
4901 3
            return '';
4902
        }
4903
4904 19
        if (self::$SUPPORT['mbstring'] === true) {
4905 19
            if ($chars) {
4906
                /** @noinspection PregQuoteUsageInspection */
4907 8
                $chars = \preg_quote($chars);
4908 8
                $pattern = "[${chars}]+$";
4909
            } else {
4910 14
                $pattern = '[\\s]+$';
4911
            }
4912
4913
            /** @noinspection PhpComposerExtensionStubsInspection */
4914 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4915
        }
4916
4917
        if ($chars) {
4918
            $chars = \preg_quote($chars, '/');
4919
            $pattern = "[${chars}]+$";
4920
        } else {
4921
            $pattern = '[\\s]+$';
4922
        }
4923
4924
        return self::regex_replace($str, $pattern, '', '', '/');
4925
    }
4926
4927
    /**
4928
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4929
     *
4930
     * @psalm-suppress MissingReturnType
4931
     */
4932 2
    public static function showSupport()
4933
    {
4934 2
        echo '<pre>';
4935 2
        foreach (self::$SUPPORT as $key => &$value) {
4936 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4937
        }
4938 2
        unset($value);
4939 2
        echo '</pre>';
4940 2
    }
4941
4942
    /**
4943
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4944
     *
4945
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
4946
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4947
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
4948
     *
4949
     * @return string
4950
     *                <p>The HTML numbered entity for the given character.</p>
4951
     */
4952 2
    public static function single_chr_html_encode(
4953
        string $char,
4954
        bool $keep_ascii_chars = false,
4955
        string $encoding = 'UTF-8'
4956
    ): string {
4957 2
        if ($char === '') {
4958 2
            return '';
4959
        }
4960
4961
        if (
4962 2
            $keep_ascii_chars === true
4963
            &&
4964 2
            ASCII::is_ascii($char) === true
4965
        ) {
4966 2
            return $char;
4967
        }
4968
4969 2
        return '&#' . self::ord($char, $encoding) . ';';
4970
    }
4971
4972
    /**
4973
     * @param string $str
4974
     * @param int    $tab_length
4975
     *
4976
     * @return string
4977
     */
4978 5
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
4979
    {
4980 5
        if ($tab_length === 4) {
4981 3
            $tab = '    ';
4982 2
        } elseif ($tab_length === 2) {
4983 1
            $tab = '  ';
4984
        } else {
4985 1
            $tab = \str_repeat(' ', $tab_length);
4986
        }
4987
4988 5
        return \str_replace($tab, "\t", $str);
4989
    }
4990
4991
    /**
4992
     * alias for "UTF8::str_split()"
4993
     *
4994
     * @param string|string[] $str
4995
     * @param int             $length
4996
     * @param bool            $clean_utf8
4997
     *
4998
     * @return string[]
4999
     *
5000
     * @see UTF8::str_split()
5001
     * @deprecated <p>please use "UTF8::str_split()"</p>
5002
     */
5003 9
    public static function split(
5004
        $str,
5005
        int $length = 1,
5006
        bool $clean_utf8 = false
5007
    ): array {
5008 9
        return self::str_split($str, $length, $clean_utf8);
5009
    }
5010
5011
    /**
5012
     * alias for "UTF8::str_starts_with()"
5013
     *
5014
     * @param string $haystack
5015
     * @param string $needle
5016
     *
5017
     * @return bool
5018
     *
5019
     * @see UTF8::str_starts_with()
5020
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5021
     */
5022
    public static function str_begins(string $haystack, string $needle): bool
5023
    {
5024
        return self::str_starts_with($haystack, $needle);
5025
    }
5026
5027
    /**
5028
     * Returns a camelCase version of the string. Trims surrounding spaces,
5029
     * capitalizes letters following digits, spaces, dashes and underscores,
5030
     * and removes spaces, dashes, as well as underscores.
5031
     *
5032
     * @param string      $str                           <p>The input string.</p>
5033
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5034
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5035
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5036
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5037
     *
5038
     * @return string
5039
     */
5040 32
    public static function str_camelize(
5041
        string $str,
5042
        string $encoding = 'UTF-8',
5043
        bool $clean_utf8 = false,
5044
        string $lang = null,
5045
        bool $try_to_keep_the_string_length = false
5046
    ): string {
5047 32
        if ($clean_utf8 === true) {
5048
            $str = self::clean($str);
5049
        }
5050
5051 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5052 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5053
        }
5054
5055 32
        $str = self::lcfirst(
5056 32
            \trim($str),
5057 32
            $encoding,
5058 32
            false,
5059 32
            $lang,
5060 32
            $try_to_keep_the_string_length
5061
        );
5062 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5063
5064 32
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5065
5066 32
        $str = (string) \preg_replace_callback(
5067 32
            '/[-_\\s]+(.)?/u',
5068
            /**
5069
             * @param array $match
5070
             *
5071
             * @return string
5072
             */
5073
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5074 27
                if (isset($match[1])) {
5075 27
                    if ($use_mb_functions === true) {
5076 27
                        if ($encoding === 'UTF-8') {
5077 27
                            return \mb_strtoupper($match[1]);
5078
                        }
5079
5080
                        return \mb_strtoupper($match[1], $encoding);
5081
                    }
5082
5083
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5084
                }
5085
5086 1
                return '';
5087 32
            },
5088 32
            $str
5089
        );
5090
5091 32
        return (string) \preg_replace_callback(
5092 32
            '/[\\p{N}]+(.)?/u',
5093
            /**
5094
             * @param array $match
5095
             *
5096
             * @return string
5097
             */
5098
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5099 6
                if ($use_mb_functions === true) {
5100 6
                    if ($encoding === 'UTF-8') {
5101 6
                        return \mb_strtoupper($match[0]);
5102
                    }
5103
5104
                    return \mb_strtoupper($match[0], $encoding);
5105
                }
5106
5107
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5108 32
            },
5109 32
            $str
5110
        );
5111
    }
5112
5113
    /**
5114
     * Returns the string with the first letter of each word capitalized,
5115
     * except for when the word is a name which shouldn't be capitalized.
5116
     *
5117
     * @param string $str
5118
     *
5119
     * @return string
5120
     *                <p>A string with $str capitalized.</p>
5121
     */
5122 1
    public static function str_capitalize_name(string $str): string
5123
    {
5124 1
        return self::str_capitalize_name_helper(
5125 1
            self::str_capitalize_name_helper(
5126 1
                self::collapse_whitespace($str),
5127 1
                ' '
5128
            ),
5129 1
            '-'
5130
        );
5131
    }
5132
5133
    /**
5134
     * Returns true if the string contains $needle, false otherwise. By default
5135
     * the comparison is case-sensitive, but can be made insensitive by setting
5136
     * $case_sensitive to false.
5137
     *
5138
     * @param string $haystack       <p>The input string.</p>
5139
     * @param string $needle         <p>Substring to look for.</p>
5140
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5141
     *
5142
     * @return bool whether or not $haystack contains $needle
5143
     */
5144 21
    public static function str_contains(
5145
        string $haystack,
5146
        string $needle,
5147
        bool $case_sensitive = true
5148
    ): bool {
5149 21
        if ($case_sensitive) {
5150 11
            return \strpos($haystack, $needle) !== false;
5151
        }
5152
5153 10
        return \mb_stripos($haystack, $needle) !== false;
5154
    }
5155
5156
    /**
5157
     * Returns true if the string contains all $needles, false otherwise. By
5158
     * default the comparison is case-sensitive, but can be made insensitive by
5159
     * setting $case_sensitive to false.
5160
     *
5161
     * @param string $haystack       <p>The input string.</p>
5162
     * @param array  $needles        <p>SubStrings to look for.</p>
5163
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5164
     *
5165
     * @return bool whether or not $haystack contains $needle
5166
     */
5167 44
    public static function str_contains_all(
5168
        string $haystack,
5169
        array $needles,
5170
        bool $case_sensitive = true
5171
    ): bool {
5172 44
        if ($haystack === '' || $needles === []) {
5173 1
            return false;
5174
        }
5175
5176
        /** @noinspection LoopWhichDoesNotLoopInspection */
5177 43
        foreach ($needles as &$needle) {
5178 43
            if (!$needle) {
5179 1
                return false;
5180
            }
5181
5182 42
            if ($case_sensitive) {
5183 22
                return \strpos($haystack, $needle) !== false;
5184
            }
5185
5186 20
            return \mb_stripos($haystack, $needle) !== false;
5187
        }
5188
5189
        return true;
5190
    }
5191
5192
    /**
5193
     * Returns true if the string contains any $needles, false otherwise. By
5194
     * default the comparison is case-sensitive, but can be made insensitive by
5195
     * setting $case_sensitive to false.
5196
     *
5197
     * @param string $haystack       <p>The input string.</p>
5198
     * @param array  $needles        <p>SubStrings to look for.</p>
5199
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5200
     *
5201
     * @return bool
5202
     *              Whether or not $str contains $needle
5203
     */
5204 46
    public static function str_contains_any(
5205
        string $haystack,
5206
        array $needles,
5207
        bool $case_sensitive = true
5208
    ): bool {
5209 46
        if ($haystack === '' || $needles === []) {
5210 1
            return false;
5211
        }
5212
5213
        /** @noinspection LoopWhichDoesNotLoopInspection */
5214 45
        foreach ($needles as &$needle) {
5215 45
            if (!$needle) {
5216
                continue;
5217
            }
5218
5219 45
            if ($case_sensitive) {
5220 25
                if (\strpos($haystack, $needle) !== false) {
5221 14
                    return true;
5222
                }
5223
5224 13
                continue;
5225
            }
5226
5227 20
            if (\mb_stripos($haystack, $needle) !== false) {
5228 20
                return true;
5229
            }
5230
        }
5231
5232 19
        return false;
5233
    }
5234
5235
    /**
5236
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5237
     * inserted before uppercase characters (with the exception of the first
5238
     * character of the string), and in place of spaces as well as underscores.
5239
     *
5240
     * @param string $str      <p>The input string.</p>
5241
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5242
     *
5243
     * @return string
5244
     */
5245 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5246
    {
5247 19
        return self::str_delimit($str, '-', $encoding);
5248
    }
5249
5250
    /**
5251
     * Returns a lowercase and trimmed string separated by the given delimiter.
5252
     * Delimiters are inserted before uppercase characters (with the exception
5253
     * of the first character of the string), and in place of spaces, dashes,
5254
     * and underscores. Alpha delimiters are not converted to lowercase.
5255
     *
5256
     * @param string      $str                           <p>The input string.</p>
5257
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
5258
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
5259
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5260
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5261
     *                                                   tr</p>
5262
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
5263
     *                                                   ß</p>
5264
     *
5265
     * @return string
5266
     */
5267 49
    public static function str_delimit(
5268
        string $str,
5269
        string $delimiter,
5270
        string $encoding = 'UTF-8',
5271
        bool $clean_utf8 = false,
5272
        string $lang = null,
5273
        bool $try_to_keep_the_string_length = false
5274
    ): string {
5275 49
        if (self::$SUPPORT['mbstring'] === true) {
5276
            /** @noinspection PhpComposerExtensionStubsInspection */
5277 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
5278
5279 49
            $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5280 49
            if ($use_mb_functions === true && $encoding === 'UTF-8') {
5281 22
                $str = \mb_strtolower($str);
5282
            } else {
5283 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5284
            }
5285
5286
            /** @noinspection PhpComposerExtensionStubsInspection */
5287 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
5288
        }
5289
5290
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
5291
5292
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
5293
        if ($use_mb_functions === true && $encoding === 'UTF-8') {
5294
            $str = \mb_strtolower($str);
5295
        } else {
5296
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5297
        }
5298
5299
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
5300
    }
5301
5302
    /**
5303
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5304
     *
5305
     * @param string $str <p>The input string.</p>
5306
     *
5307
     * @return false|string
5308
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5309
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5310
     */
5311 30
    public static function str_detect_encoding($str)
5312
    {
5313
        // init
5314 30
        $str = (string) $str;
5315
5316
        //
5317
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5318
        //
5319
5320 30
        if (self::is_binary($str, true) === true) {
5321 11
            $is_utf32 = self::is_utf32($str, false);
5322 11
            if ($is_utf32 === 1) {
5323
                return 'UTF-32LE';
5324
            }
5325 11
            if ($is_utf32 === 2) {
5326 1
                return 'UTF-32BE';
5327
            }
5328
5329 11
            $is_utf16 = self::is_utf16($str, false);
5330 11
            if ($is_utf16 === 1) {
5331 3
                return 'UTF-16LE';
5332
            }
5333 11
            if ($is_utf16 === 2) {
5334 2
                return 'UTF-16BE';
5335
            }
5336
5337
            // is binary but not "UTF-16" or "UTF-32"
5338 9
            return false;
5339
        }
5340
5341
        //
5342
        // 2.) simple check for ASCII chars
5343
        //
5344
5345 26
        if (ASCII::is_ascii($str) === true) {
5346 10
            return 'ASCII';
5347
        }
5348
5349
        //
5350
        // 3.) simple check for UTF-8 chars
5351
        //
5352
5353 26
        if (self::is_utf8_string($str) === true) {
5354 19
            return 'UTF-8';
5355
        }
5356
5357
        //
5358
        // 4.) check via "mb_detect_encoding()"
5359
        //
5360
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5361
5362
        $encoding_detecting_order = [
5363 15
            'ISO-8859-1',
5364
            'ISO-8859-2',
5365
            'ISO-8859-3',
5366
            'ISO-8859-4',
5367
            'ISO-8859-5',
5368
            'ISO-8859-6',
5369
            'ISO-8859-7',
5370
            'ISO-8859-8',
5371
            'ISO-8859-9',
5372
            'ISO-8859-10',
5373
            'ISO-8859-13',
5374
            'ISO-8859-14',
5375
            'ISO-8859-15',
5376
            'ISO-8859-16',
5377
            'WINDOWS-1251',
5378
            'WINDOWS-1252',
5379
            'WINDOWS-1254',
5380
            'CP932',
5381
            'CP936',
5382
            'CP950',
5383
            'CP866',
5384
            'CP850',
5385
            'CP51932',
5386
            'CP50220',
5387
            'CP50221',
5388
            'CP50222',
5389
            'ISO-2022-JP',
5390
            'ISO-2022-KR',
5391
            'JIS',
5392
            'JIS-ms',
5393
            'EUC-CN',
5394
            'EUC-JP',
5395
        ];
5396
5397 15
        if (self::$SUPPORT['mbstring'] === true) {
5398
            // info: do not use the symfony polyfill here
5399 15
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
5400 15
            if ($encoding) {
5401 15
                return $encoding;
5402
            }
5403
        }
5404
5405
        //
5406
        // 5.) check via "iconv()"
5407
        //
5408
5409
        if (self::$ENCODINGS === null) {
5410
            self::$ENCODINGS = self::getData('encodings');
5411
        }
5412
5413
        foreach (self::$ENCODINGS as $encoding_tmp) {
5414
            // INFO: //IGNORE but still throw notice
5415
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5416
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
5417
                return $encoding_tmp;
5418
            }
5419
        }
5420
5421
        return false;
5422
    }
5423
5424
    /**
5425
     * alias for "UTF8::str_ends_with()"
5426
     *
5427
     * @param string $haystack
5428
     * @param string $needle
5429
     *
5430
     * @return bool
5431
     *
5432
     * @see UTF8::str_ends_with()
5433
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
5434
     */
5435
    public static function str_ends(string $haystack, string $needle): bool
5436
    {
5437
        return self::str_ends_with($haystack, $needle);
5438
    }
5439
5440
    /**
5441
     * Check if the string ends with the given substring.
5442
     *
5443
     * @param string $haystack <p>The string to search in.</p>
5444
     * @param string $needle   <p>The substring to search for.</p>
5445
     *
5446
     * @return bool
5447
     */
5448 9
    public static function str_ends_with(string $haystack, string $needle): bool
5449
    {
5450 9
        if ($needle === '') {
5451 2
            return true;
5452
        }
5453
5454 9
        if ($haystack === '') {
5455
            return false;
5456
        }
5457
5458 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5459
    }
5460
5461
    /**
5462
     * Returns true if the string ends with any of $substrings, false otherwise.
5463
     *
5464
     * - case-sensitive
5465
     *
5466
     * @param string   $str        <p>The input string.</p>
5467
     * @param string[] $substrings <p>Substrings to look for.</p>
5468
     *
5469
     * @return bool whether or not $str ends with $substring
5470
     */
5471 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5472
    {
5473 7
        if ($substrings === []) {
5474
            return false;
5475
        }
5476
5477 7
        foreach ($substrings as &$substring) {
5478 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5479 7
                return true;
5480
            }
5481
        }
5482
5483 6
        return false;
5484
    }
5485
5486
    /**
5487
     * Ensures that the string begins with $substring. If it doesn't, it's
5488
     * prepended.
5489
     *
5490
     * @param string $str       <p>The input string.</p>
5491
     * @param string $substring <p>The substring to add if not present.</p>
5492
     *
5493
     * @return string
5494
     */
5495 10
    public static function str_ensure_left(string $str, string $substring): string
5496
    {
5497
        if (
5498 10
            $substring !== ''
5499
            &&
5500 10
            \strpos($str, $substring) === 0
5501
        ) {
5502 6
            return $str;
5503
        }
5504
5505 4
        return $substring . $str;
5506
    }
5507
5508
    /**
5509
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5510
     *
5511
     * @param string $str       <p>The input string.</p>
5512
     * @param string $substring <p>The substring to add if not present.</p>
5513
     *
5514
     * @return string
5515
     */
5516 10
    public static function str_ensure_right(string $str, string $substring): string
5517
    {
5518
        if (
5519 10
            $str === ''
5520
            ||
5521 10
            $substring === ''
5522
            ||
5523 10
            \substr($str, -\strlen($substring)) !== $substring
5524
        ) {
5525 4
            $str .= $substring;
5526
        }
5527
5528 10
        return $str;
5529
    }
5530
5531
    /**
5532
     * Capitalizes the first word of the string, replaces underscores with
5533
     * spaces, and strips '_id'.
5534
     *
5535
     * @param string $str
5536
     *
5537
     * @return string
5538
     */
5539 3
    public static function str_humanize($str): string
5540
    {
5541 3
        $str = \str_replace(
5542
            [
5543 3
                '_id',
5544
                '_',
5545
            ],
5546
            [
5547 3
                '',
5548
                ' ',
5549
            ],
5550 3
            $str
5551
        );
5552
5553 3
        return self::ucfirst(\trim($str));
5554
    }
5555
5556
    /**
5557
     * alias for "UTF8::str_istarts_with()"
5558
     *
5559
     * @param string $haystack
5560
     * @param string $needle
5561
     *
5562
     * @return bool
5563
     *
5564
     * @see UTF8::str_istarts_with()
5565
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
5566
     */
5567
    public static function str_ibegins(string $haystack, string $needle): bool
5568
    {
5569
        return self::str_istarts_with($haystack, $needle);
5570
    }
5571
5572
    /**
5573
     * alias for "UTF8::str_iends_with()"
5574
     *
5575
     * @param string $haystack
5576
     * @param string $needle
5577
     *
5578
     * @return bool
5579
     *
5580
     * @see UTF8::str_iends_with()
5581
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
5582
     */
5583
    public static function str_iends(string $haystack, string $needle): bool
5584
    {
5585
        return self::str_iends_with($haystack, $needle);
5586
    }
5587
5588
    /**
5589
     * Check if the string ends with the given substring, case-insensitive.
5590
     *
5591
     * @param string $haystack <p>The string to search in.</p>
5592
     * @param string $needle   <p>The substring to search for.</p>
5593
     *
5594
     * @return bool
5595
     */
5596 12
    public static function str_iends_with(string $haystack, string $needle): bool
5597
    {
5598 12
        if ($needle === '') {
5599 2
            return true;
5600
        }
5601
5602 12
        if ($haystack === '') {
5603
            return false;
5604
        }
5605
5606 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5607
    }
5608
5609
    /**
5610
     * Returns true if the string ends with any of $substrings, false otherwise.
5611
     *
5612
     * - case-insensitive
5613
     *
5614
     * @param string   $str        <p>The input string.</p>
5615
     * @param string[] $substrings <p>Substrings to look for.</p>
5616
     *
5617
     * @return bool
5618
     *              <p>Whether or not $str ends with $substring.</p>
5619
     */
5620 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5621
    {
5622 4
        if ($substrings === []) {
5623
            return false;
5624
        }
5625
5626 4
        foreach ($substrings as &$substring) {
5627 4
            if (self::str_iends_with($str, $substring)) {
5628 4
                return true;
5629
            }
5630
        }
5631
5632
        return false;
5633
    }
5634
5635
    /**
5636
     * Returns the index of the first occurrence of $needle in the string,
5637
     * and false if not found. Accepts an optional offset from which to begin
5638
     * the search.
5639
     *
5640
     * @param string $str      <p>The input string.</p>
5641
     * @param string $needle   <p>Substring to look for.</p>
5642
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5643
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5644
     *
5645
     * @return false|int
5646
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5647
     *
5648
     * @see UTF8::stripos()
5649
     * @deprecated <p>please use "UTF8::stripos()"</p>
5650
     */
5651
    public static function str_iindex_first(
5652
        string $str,
5653
        string $needle,
5654
        int $offset = 0,
5655
        string $encoding = 'UTF-8'
5656
    ) {
5657
        return self::stripos(
5658
            $str,
5659
            $needle,
5660
            $offset,
5661
            $encoding
5662
        );
5663
    }
5664
5665
    /**
5666
     * Returns the index of the last occurrence of $needle in the string,
5667
     * and false if not found. Accepts an optional offset from which to begin
5668
     * the search. Offsets may be negative to count from the last character
5669
     * in the string.
5670
     *
5671
     * @param string $str      <p>The input string.</p>
5672
     * @param string $needle   <p>Substring to look for.</p>
5673
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5674
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5675
     *
5676
     * @return false|int
5677
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5678
     *
5679
     * @see UTF8::strripos()
5680
     * @deprecated <p>please use "UTF8::strripos()"</p>
5681
     */
5682
    public static function str_iindex_last(
5683
        string $str,
5684
        string $needle,
5685
        int $offset = 0,
5686
        string $encoding = 'UTF-8'
5687
    ) {
5688
        return self::strripos(
5689
            $str,
5690
            $needle,
5691
            $offset,
5692
            $encoding
5693
        );
5694
    }
5695
5696
    /**
5697
     * Returns the index of the first occurrence of $needle in the string,
5698
     * and false if not found. Accepts an optional offset from which to begin
5699
     * the search.
5700
     *
5701
     * @param string $str      <p>The input string.</p>
5702
     * @param string $needle   <p>Substring to look for.</p>
5703
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5704
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5705
     *
5706
     * @return false|int
5707
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5708
     *
5709
     * @see UTF8::strpos()
5710
     * @deprecated <p>please use "UTF8::strpos()"</p>
5711
     */
5712 10
    public static function str_index_first(
5713
        string $str,
5714
        string $needle,
5715
        int $offset = 0,
5716
        string $encoding = 'UTF-8'
5717
    ) {
5718 10
        return self::strpos(
5719 10
            $str,
5720 10
            $needle,
5721 10
            $offset,
5722 10
            $encoding
5723
        );
5724
    }
5725
5726
    /**
5727
     * Returns the index of the last occurrence of $needle in the string,
5728
     * and false if not found. Accepts an optional offset from which to begin
5729
     * the search. Offsets may be negative to count from the last character
5730
     * in the string.
5731
     *
5732
     * @param string $str      <p>The input string.</p>
5733
     * @param string $needle   <p>Substring to look for.</p>
5734
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5735
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5736
     *
5737
     * @return false|int
5738
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
5739
     *
5740
     * @see UTF8::strrpos()
5741
     * @deprecated <p>please use "UTF8::strrpos()"</p>
5742
     */
5743 10
    public static function str_index_last(
5744
        string $str,
5745
        string $needle,
5746
        int $offset = 0,
5747
        string $encoding = 'UTF-8'
5748
    ) {
5749 10
        return self::strrpos(
5750 10
            $str,
5751 10
            $needle,
5752 10
            $offset,
5753 10
            $encoding
5754
        );
5755
    }
5756
5757
    /**
5758
     * Inserts $substring into the string at the $index provided.
5759
     *
5760
     * @param string $str       <p>The input string.</p>
5761
     * @param string $substring <p>String to be inserted.</p>
5762
     * @param int    $index     <p>The index at which to insert the substring.</p>
5763
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5764
     *
5765
     * @return string
5766
     */
5767 8
    public static function str_insert(
5768
        string $str,
5769
        string $substring,
5770
        int $index,
5771
        string $encoding = 'UTF-8'
5772
    ): string {
5773 8
        if ($encoding === 'UTF-8') {
5774 4
            $len = (int) \mb_strlen($str);
5775 4
            if ($index > $len) {
5776
                return $str;
5777
            }
5778
5779
            /** @noinspection UnnecessaryCastingInspection */
5780 4
            return (string) \mb_substr($str, 0, $index) .
5781 4
                   $substring .
5782 4
                   (string) \mb_substr($str, $index, $len);
5783
        }
5784
5785 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5786
5787 4
        $len = (int) self::strlen($str, $encoding);
5788 4
        if ($index > $len) {
5789 1
            return $str;
5790
        }
5791
5792 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5793 3
               $substring .
5794 3
               ((string) self::substr($str, $index, $len, $encoding));
5795
    }
5796
5797
    /**
5798
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5799
     *
5800
     * @see http://php.net/manual/en/function.str-ireplace.php
5801
     *
5802
     * @param mixed $search  <p>
5803
     *                       Every replacement with search array is
5804
     *                       performed on the result of previous replacement.
5805
     *                       </p>
5806
     * @param mixed $replace <p>
5807
     *                       </p>
5808
     * @param mixed $subject <p>
5809
     *                       If subject is an array, then the search and
5810
     *                       replace is performed with every entry of
5811
     *                       subject, and the return value is an array as
5812
     *                       well.
5813
     *                       </p>
5814
     * @param int   $count   [optional] <p>
5815
     *                       The number of matched and replaced needles will
5816
     *                       be returned in count which is passed by
5817
     *                       reference.
5818
     *                       </p>
5819
     *
5820
     * @return mixed a string or an array of replacements
5821
     */
5822 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5823
    {
5824 29
        $search = (array) $search;
5825
5826
        /** @noinspection AlterInForeachInspection */
5827 29
        foreach ($search as &$s) {
5828 29
            $s = (string) $s;
5829 29
            if ($s === '') {
5830 6
                $s = '/^(?<=.)$/';
5831
            } else {
5832 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5833
            }
5834
        }
5835
5836 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5837 29
        $count = $replace; // used as reference parameter
5838
5839 29
        return $subject;
5840
    }
5841
5842
    /**
5843
     * Replaces $search from the beginning of string with $replacement.
5844
     *
5845
     * @param string $str         <p>The input string.</p>
5846
     * @param string $search      <p>The string to search for.</p>
5847
     * @param string $replacement <p>The replacement.</p>
5848
     *
5849
     * @return string string after the replacements
5850
     */
5851 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5852
    {
5853 17
        if ($str === '') {
5854 4
            if ($replacement === '') {
5855 2
                return '';
5856
            }
5857
5858 2
            if ($search === '') {
5859 2
                return $replacement;
5860
            }
5861
        }
5862
5863 13
        if ($search === '') {
5864 2
            return $str . $replacement;
5865
        }
5866
5867 11
        if (\stripos($str, $search) === 0) {
5868 10
            return $replacement . \substr($str, \strlen($search));
5869
        }
5870
5871 1
        return $str;
5872
    }
5873
5874
    /**
5875
     * Replaces $search from the ending of string with $replacement.
5876
     *
5877
     * @param string $str         <p>The input string.</p>
5878
     * @param string $search      <p>The string to search for.</p>
5879
     * @param string $replacement <p>The replacement.</p>
5880
     *
5881
     * @return string
5882
     *                <p>string after the replacements.</p>
5883
     */
5884 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5885
    {
5886 17
        if ($str === '') {
5887 4
            if ($replacement === '') {
5888 2
                return '';
5889
            }
5890
5891 2
            if ($search === '') {
5892 2
                return $replacement;
5893
            }
5894
        }
5895
5896 13
        if ($search === '') {
5897 2
            return $str . $replacement;
5898
        }
5899
5900 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5901 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5902
        }
5903
5904 11
        return $str;
5905
    }
5906
5907
    /**
5908
     * Check if the string starts with the given substring, case-insensitive.
5909
     *
5910
     * @param string $haystack <p>The string to search in.</p>
5911
     * @param string $needle   <p>The substring to search for.</p>
5912
     *
5913
     * @return bool
5914
     */
5915 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5916
    {
5917 12
        if ($needle === '') {
5918 2
            return true;
5919
        }
5920
5921 12
        if ($haystack === '') {
5922
            return false;
5923
        }
5924
5925 12
        return self::stripos($haystack, $needle) === 0;
5926
    }
5927
5928
    /**
5929
     * Returns true if the string begins with any of $substrings, false otherwise.
5930
     *
5931
     * - case-insensitive
5932
     *
5933
     * @param string $str        <p>The input string.</p>
5934
     * @param array  $substrings <p>Substrings to look for.</p>
5935
     *
5936
     * @return bool whether or not $str starts with $substring
5937
     */
5938 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5939
    {
5940 4
        if ($str === '') {
5941
            return false;
5942
        }
5943
5944 4
        if ($substrings === []) {
5945
            return false;
5946
        }
5947
5948 4
        foreach ($substrings as &$substring) {
5949 4
            if (self::str_istarts_with($str, $substring)) {
5950 4
                return true;
5951
            }
5952
        }
5953
5954
        return false;
5955
    }
5956
5957
    /**
5958
     * Gets the substring after the first occurrence of a separator.
5959
     *
5960
     * @param string $str       <p>The input string.</p>
5961
     * @param string $separator <p>The string separator.</p>
5962
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5963
     *
5964
     * @return string
5965
     */
5966 1
    public static function str_isubstr_after_first_separator(
5967
        string $str,
5968
        string $separator,
5969
        string $encoding = 'UTF-8'
5970
    ): string {
5971 1
        if ($separator === '' || $str === '') {
5972 1
            return '';
5973
        }
5974
5975 1
        $offset = self::stripos($str, $separator);
5976 1
        if ($offset === false) {
5977 1
            return '';
5978
        }
5979
5980 1
        if ($encoding === 'UTF-8') {
5981 1
            return (string) \mb_substr(
5982 1
                $str,
5983 1
                $offset + (int) \mb_strlen($separator)
5984
            );
5985
        }
5986
5987
        return (string) self::substr(
5988
            $str,
5989
            $offset + (int) self::strlen($separator, $encoding),
5990
            null,
5991
            $encoding
5992
        );
5993
    }
5994
5995
    /**
5996
     * Gets the substring after the last occurrence of a separator.
5997
     *
5998
     * @param string $str       <p>The input string.</p>
5999
     * @param string $separator <p>The string separator.</p>
6000
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6001
     *
6002
     * @return string
6003
     */
6004 1
    public static function str_isubstr_after_last_separator(
6005
        string $str,
6006
        string $separator,
6007
        string $encoding = 'UTF-8'
6008
    ): string {
6009 1
        if ($separator === '' || $str === '') {
6010 1
            return '';
6011
        }
6012
6013 1
        $offset = self::strripos($str, $separator);
6014 1
        if ($offset === false) {
6015 1
            return '';
6016
        }
6017
6018 1
        if ($encoding === 'UTF-8') {
6019 1
            return (string) \mb_substr(
6020 1
                $str,
6021 1
                $offset + (int) self::strlen($separator)
6022
            );
6023
        }
6024
6025
        return (string) self::substr(
6026
            $str,
6027
            $offset + (int) self::strlen($separator, $encoding),
6028
            null,
6029
            $encoding
6030
        );
6031
    }
6032
6033
    /**
6034
     * Gets the substring before the first occurrence of a separator.
6035
     *
6036
     * @param string $str       <p>The input string.</p>
6037
     * @param string $separator <p>The string separator.</p>
6038
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6039
     *
6040
     * @return string
6041
     */
6042 1
    public static function str_isubstr_before_first_separator(
6043
        string $str,
6044
        string $separator,
6045
        string $encoding = 'UTF-8'
6046
    ): string {
6047 1
        if ($separator === '' || $str === '') {
6048 1
            return '';
6049
        }
6050
6051 1
        $offset = self::stripos($str, $separator);
6052 1
        if ($offset === false) {
6053 1
            return '';
6054
        }
6055
6056 1
        if ($encoding === 'UTF-8') {
6057 1
            return (string) \mb_substr($str, 0, $offset);
6058
        }
6059
6060
        return (string) self::substr($str, 0, $offset, $encoding);
6061
    }
6062
6063
    /**
6064
     * Gets the substring before the last occurrence of a separator.
6065
     *
6066
     * @param string $str       <p>The input string.</p>
6067
     * @param string $separator <p>The string separator.</p>
6068
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6069
     *
6070
     * @return string
6071
     */
6072 1
    public static function str_isubstr_before_last_separator(
6073
        string $str,
6074
        string $separator,
6075
        string $encoding = 'UTF-8'
6076
    ): string {
6077 1
        if ($separator === '' || $str === '') {
6078 1
            return '';
6079
        }
6080
6081 1
        if ($encoding === 'UTF-8') {
6082 1
            $offset = \mb_strripos($str, $separator);
6083 1
            if ($offset === false) {
6084 1
                return '';
6085
            }
6086
6087 1
            return (string) \mb_substr($str, 0, $offset);
6088
        }
6089
6090
        $offset = self::strripos($str, $separator, 0, $encoding);
6091
        if ($offset === false) {
6092
            return '';
6093
        }
6094
6095
        return (string) self::substr($str, 0, $offset, $encoding);
6096
    }
6097
6098
    /**
6099
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
6100
     *
6101
     * @param string $str           <p>The input string.</p>
6102
     * @param string $needle        <p>The string to look for.</p>
6103
     * @param bool   $before_needle [optional] <p>Default: false</p>
6104
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6105
     *
6106
     * @return string
6107
     */
6108 2
    public static function str_isubstr_first(
6109
        string $str,
6110
        string $needle,
6111
        bool $before_needle = false,
6112
        string $encoding = 'UTF-8'
6113
    ): string {
6114
        if (
6115 2
            $needle === ''
6116
            ||
6117 2
            $str === ''
6118
        ) {
6119 2
            return '';
6120
        }
6121
6122 2
        $part = self::stristr(
6123 2
            $str,
6124 2
            $needle,
6125 2
            $before_needle,
6126 2
            $encoding
6127
        );
6128 2
        if ($part === false) {
6129 2
            return '';
6130
        }
6131
6132 2
        return $part;
6133
    }
6134
6135
    /**
6136
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
6137
     *
6138
     * @param string $str           <p>The input string.</p>
6139
     * @param string $needle        <p>The string to look for.</p>
6140
     * @param bool   $before_needle [optional] <p>Default: false</p>
6141
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
6142
     *
6143
     * @return string
6144
     */
6145 1
    public static function str_isubstr_last(
6146
        string $str,
6147
        string $needle,
6148
        bool $before_needle = false,
6149
        string $encoding = 'UTF-8'
6150
    ): string {
6151
        if (
6152 1
            $needle === ''
6153
            ||
6154 1
            $str === ''
6155
        ) {
6156 1
            return '';
6157
        }
6158
6159 1
        $part = self::strrichr(
6160 1
            $str,
6161 1
            $needle,
6162 1
            $before_needle,
6163 1
            $encoding
6164
        );
6165 1
        if ($part === false) {
6166 1
            return '';
6167
        }
6168
6169 1
        return $part;
6170
    }
6171
6172
    /**
6173
     * Returns the last $n characters of the string.
6174
     *
6175
     * @param string $str      <p>The input string.</p>
6176
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6177
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6178
     *
6179
     * @return string
6180
     */
6181 12
    public static function str_last_char(
6182
        string $str,
6183
        int $n = 1,
6184
        string $encoding = 'UTF-8'
6185
    ): string {
6186 12
        if ($str === '' || $n <= 0) {
6187 4
            return '';
6188
        }
6189
6190 8
        if ($encoding === 'UTF-8') {
6191 4
            return (string) \mb_substr($str, -$n);
6192
        }
6193
6194 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6195
6196 4
        return (string) self::substr($str, -$n, null, $encoding);
6197
    }
6198
6199
    /**
6200
     * Limit the number of characters in a string.
6201
     *
6202
     * @param string $str        <p>The input string.</p>
6203
     * @param int    $length     [optional] <p>Default: 100</p>
6204
     * @param string $str_add_on [optional] <p>Default: …</p>
6205
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6206
     *
6207
     * @return string
6208
     */
6209 2
    public static function str_limit(
6210
        string $str,
6211
        int $length = 100,
6212
        string $str_add_on = '…',
6213
        string $encoding = 'UTF-8'
6214
    ): string {
6215 2
        if ($str === '' || $length <= 0) {
6216 2
            return '';
6217
        }
6218
6219 2
        if ($encoding === 'UTF-8') {
6220 2
            if ((int) \mb_strlen($str) <= $length) {
6221 2
                return $str;
6222
            }
6223
6224
            /** @noinspection UnnecessaryCastingInspection */
6225 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
6226
        }
6227
6228
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6229
6230
        if ((int) self::strlen($str, $encoding) <= $length) {
6231
            return $str;
6232
        }
6233
6234
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
6235
    }
6236
6237
    /**
6238
     * Limit the number of characters in a string, but also after the next word.
6239
     *
6240
     * @param string $str        <p>The input string.</p>
6241
     * @param int    $length     [optional] <p>Default: 100</p>
6242
     * @param string $str_add_on [optional] <p>Default: …</p>
6243
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
6244
     *
6245
     * @return string
6246
     */
6247 6
    public static function str_limit_after_word(
6248
        string $str,
6249
        int $length = 100,
6250
        string $str_add_on = '…',
6251
        string $encoding = 'UTF-8'
6252
    ): string {
6253 6
        if ($str === '' || $length <= 0) {
6254 2
            return '';
6255
        }
6256
6257 6
        if ($encoding === 'UTF-8') {
6258
            /** @noinspection UnnecessaryCastingInspection */
6259 2
            if ((int) \mb_strlen($str) <= $length) {
6260 2
                return $str;
6261
            }
6262
6263 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6264 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6265
            }
6266
6267 2
            $str = \mb_substr($str, 0, $length);
6268
6269 2
            $array = \explode(' ', $str);
6270 2
            \array_pop($array);
6271 2
            $new_str = \implode(' ', $array);
6272
6273 2
            if ($new_str === '') {
6274 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
6275
            }
6276
        } else {
6277 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6278
                return $str;
6279
            }
6280
6281 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6282 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6283
            }
6284
6285
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6286 1
            $str = self::substr($str, 0, $length, $encoding);
6287
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
6288 1
            if ($str === false) {
6289
                return '' . $str_add_on;
6290
            }
6291
6292 1
            $array = \explode(' ', $str);
6293 1
            \array_pop($array);
6294 1
            $new_str = \implode(' ', $array);
6295
6296 1
            if ($new_str === '') {
6297
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
6298
            }
6299
        }
6300
6301 3
        return $new_str . $str_add_on;
6302
    }
6303
6304
    /**
6305
     * Returns the longest common prefix between the $str1 and $str2.
6306
     *
6307
     * @param string $str1     <p>The input sting.</p>
6308
     * @param string $str2     <p>Second string for comparison.</p>
6309
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6310
     *
6311
     * @return string
6312
     */
6313 10
    public static function str_longest_common_prefix(
6314
        string $str1,
6315
        string $str2,
6316
        string $encoding = 'UTF-8'
6317
    ): string {
6318
        // init
6319 10
        $longest_common_prefix = '';
6320
6321 10
        if ($encoding === 'UTF-8') {
6322 5
            $max_length = (int) \min(
6323 5
                \mb_strlen($str1),
6324 5
                \mb_strlen($str2)
6325
            );
6326
6327 5
            for ($i = 0; $i < $max_length; ++$i) {
6328 4
                $char = \mb_substr($str1, $i, 1);
6329
6330
                if (
6331 4
                    $char !== false
6332
                    &&
6333 4
                    $char === \mb_substr($str2, $i, 1)
6334
                ) {
6335 3
                    $longest_common_prefix .= $char;
6336
                } else {
6337 3
                    break;
6338
                }
6339
            }
6340
        } else {
6341 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6342
6343 5
            $max_length = (int) \min(
6344 5
                self::strlen($str1, $encoding),
6345 5
                self::strlen($str2, $encoding)
6346
            );
6347
6348 5
            for ($i = 0; $i < $max_length; ++$i) {
6349 4
                $char = self::substr($str1, $i, 1, $encoding);
6350
6351
                if (
6352 4
                    $char !== false
6353
                    &&
6354 4
                    $char === self::substr($str2, $i, 1, $encoding)
6355
                ) {
6356 3
                    $longest_common_prefix .= $char;
6357
                } else {
6358 3
                    break;
6359
                }
6360
            }
6361
        }
6362
6363 10
        return $longest_common_prefix;
6364
    }
6365
6366
    /**
6367
     * Returns the longest common substring between the $str1 and $str2.
6368
     * In the case of ties, it returns that which occurs first.
6369
     *
6370
     * @param string $str1
6371
     * @param string $str2     <p>Second string for comparison.</p>
6372
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6373
     *
6374
     * @return string
6375
     *                <p>A string with its $str being the longest common substring.</p>
6376
     */
6377 11
    public static function str_longest_common_substring(
6378
        string $str1,
6379
        string $str2,
6380
        string $encoding = 'UTF-8'
6381
    ): string {
6382 11
        if ($str1 === '' || $str2 === '') {
6383 2
            return '';
6384
        }
6385
6386
        // Uses dynamic programming to solve
6387
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6388
6389 9
        if ($encoding === 'UTF-8') {
6390 4
            $str_length = (int) \mb_strlen($str1);
6391 4
            $other_length = (int) \mb_strlen($str2);
6392
        } else {
6393 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6394
6395 5
            $str_length = (int) self::strlen($str1, $encoding);
6396 5
            $other_length = (int) self::strlen($str2, $encoding);
6397
        }
6398
6399
        // Return if either string is empty
6400 9
        if ($str_length === 0 || $other_length === 0) {
6401
            return '';
6402
        }
6403
6404 9
        $len = 0;
6405 9
        $end = 0;
6406 9
        $table = \array_fill(
6407 9
            0,
6408 9
            $str_length + 1,
6409 9
            \array_fill(0, $other_length + 1, 0)
6410
        );
6411
6412 9
        if ($encoding === 'UTF-8') {
6413 9
            for ($i = 1; $i <= $str_length; ++$i) {
6414 9
                for ($j = 1; $j <= $other_length; ++$j) {
6415 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
6416 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
6417
6418 9
                    if ($str_char === $other_char) {
6419 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6420 8
                        if ($table[$i][$j] > $len) {
6421 8
                            $len = $table[$i][$j];
6422 8
                            $end = $i;
6423
                        }
6424
                    } else {
6425 9
                        $table[$i][$j] = 0;
6426
                    }
6427
                }
6428
            }
6429
        } else {
6430
            for ($i = 1; $i <= $str_length; ++$i) {
6431
                for ($j = 1; $j <= $other_length; ++$j) {
6432
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
6433
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
6434
6435
                    if ($str_char === $other_char) {
6436
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6437
                        if ($table[$i][$j] > $len) {
6438
                            $len = $table[$i][$j];
6439
                            $end = $i;
6440
                        }
6441
                    } else {
6442
                        $table[$i][$j] = 0;
6443
                    }
6444
                }
6445
            }
6446
        }
6447
6448 9
        if ($encoding === 'UTF-8') {
6449 9
            return (string) \mb_substr($str1, $end - $len, $len);
6450
        }
6451
6452
        return (string) self::substr($str1, $end - $len, $len, $encoding);
6453
    }
6454
6455
    /**
6456
     * Returns the longest common suffix between the $str1 and $str2.
6457
     *
6458
     * @param string $str1
6459
     * @param string $str2     <p>Second string for comparison.</p>
6460
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6461
     *
6462
     * @return string
6463
     */
6464 10
    public static function str_longest_common_suffix(
6465
        string $str1,
6466
        string $str2,
6467
        string $encoding = 'UTF-8'
6468
    ): string {
6469 10
        if ($str1 === '' || $str2 === '') {
6470 2
            return '';
6471
        }
6472
6473 8
        if ($encoding === 'UTF-8') {
6474 4
            $max_length = (int) \min(
6475 4
                \mb_strlen($str1, $encoding),
6476 4
                \mb_strlen($str2, $encoding)
6477
            );
6478
6479 4
            $longest_common_suffix = '';
6480 4
            for ($i = 1; $i <= $max_length; ++$i) {
6481 4
                $char = \mb_substr($str1, -$i, 1);
6482
6483
                if (
6484 4
                    $char !== false
6485
                    &&
6486 4
                    $char === \mb_substr($str2, -$i, 1)
6487
                ) {
6488 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6489
                } else {
6490 3
                    break;
6491
                }
6492
            }
6493
        } else {
6494 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6495
6496 4
            $max_length = (int) \min(
6497 4
                self::strlen($str1, $encoding),
6498 4
                self::strlen($str2, $encoding)
6499
            );
6500
6501 4
            $longest_common_suffix = '';
6502 4
            for ($i = 1; $i <= $max_length; ++$i) {
6503 4
                $char = self::substr($str1, -$i, 1, $encoding);
6504
6505
                if (
6506 4
                    $char !== false
6507
                    &&
6508 4
                    $char === self::substr($str2, -$i, 1, $encoding)
6509
                ) {
6510 3
                    $longest_common_suffix = $char . $longest_common_suffix;
6511
                } else {
6512 3
                    break;
6513
                }
6514
            }
6515
        }
6516
6517 8
        return $longest_common_suffix;
6518
    }
6519
6520
    /**
6521
     * Returns true if $str matches the supplied pattern, false otherwise.
6522
     *
6523
     * @param string $str     <p>The input string.</p>
6524
     * @param string $pattern <p>Regex pattern to match against.</p>
6525
     *
6526
     * @return bool whether or not $str matches the pattern
6527
     */
6528
    public static function str_matches_pattern(string $str, string $pattern): bool
6529
    {
6530
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6531
    }
6532
6533
    /**
6534
     * Returns whether or not a character exists at an index. Offsets may be
6535
     * negative to count from the last character in the string. Implements
6536
     * part of the ArrayAccess interface.
6537
     *
6538
     * @param string $str      <p>The input string.</p>
6539
     * @param int    $offset   <p>The index to check.</p>
6540
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6541
     *
6542
     * @return bool whether or not the index exists
6543
     */
6544 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6545
    {
6546
        // init
6547 6
        $length = (int) self::strlen($str, $encoding);
6548
6549 6
        if ($offset >= 0) {
6550 3
            return $length > $offset;
6551
        }
6552
6553 3
        return $length >= \abs($offset);
6554
    }
6555
6556
    /**
6557
     * Returns the character at the given index. Offsets may be negative to
6558
     * count from the last character in the string. Implements part of the
6559
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6560
     * does not exist.
6561
     *
6562
     * @param string $str      <p>The input string.</p>
6563
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6564
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6565
     *
6566
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6567
     *
6568
     * @return string
6569
     *                <p>The character at the specified index.</p>
6570
     */
6571 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6572
    {
6573
        // init
6574 2
        $length = (int) self::strlen($str);
6575
6576
        if (
6577 2
            ($index >= 0 && $length <= $index)
6578
            ||
6579 2
            $length < \abs($index)
6580
        ) {
6581 1
            throw new \OutOfBoundsException('No character exists at the index');
6582
        }
6583
6584 1
        return self::char_at($str, $index, $encoding);
6585
    }
6586
6587
    /**
6588
     * Pad a UTF-8 string to a given length with another string.
6589
     *
6590
     * @param string     $str        <p>The input string.</p>
6591
     * @param int        $pad_length <p>The length of return string.</p>
6592
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6593
     * @param int|string $pad_type   [optional] <p>
6594
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6595
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6596
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6597
     *                               </p>
6598
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6599
     *
6600
     * @return string
6601
     *                <p>Returns the padded string.</p>
6602
     */
6603 41
    public static function str_pad(
6604
        string $str,
6605
        int $pad_length,
6606
        string $pad_string = ' ',
6607
        $pad_type = \STR_PAD_RIGHT,
6608
        string $encoding = 'UTF-8'
6609
    ): string {
6610 41
        if ($pad_length === 0 || $pad_string === '') {
6611 1
            return $str;
6612
        }
6613
6614 41
        if ($pad_type !== (int) $pad_type) {
6615 13
            if ($pad_type === 'left') {
6616 3
                $pad_type = \STR_PAD_LEFT;
6617 10
            } elseif ($pad_type === 'right') {
6618 6
                $pad_type = \STR_PAD_RIGHT;
6619 4
            } elseif ($pad_type === 'both') {
6620 3
                $pad_type = \STR_PAD_BOTH;
6621
            } else {
6622 1
                throw new \InvalidArgumentException(
6623 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6624
                );
6625
            }
6626
        }
6627
6628 40
        if ($encoding === 'UTF-8') {
6629 25
            $str_length = (int) \mb_strlen($str);
6630
6631 25
            if ($pad_length >= $str_length) {
6632
                switch ($pad_type) {
6633 25
                    case \STR_PAD_LEFT:
6634 8
                        $ps_length = (int) \mb_strlen($pad_string);
6635
6636 8
                        $diff = ($pad_length - $str_length);
6637
6638 8
                        $pre = (string) \mb_substr(
6639 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6640 8
                            0,
6641 8
                            $diff
6642
                        );
6643 8
                        $post = '';
6644
6645 8
                        break;
6646
6647 20
                    case \STR_PAD_BOTH:
6648 14
                        $diff = ($pad_length - $str_length);
6649
6650 14
                        $ps_length_left = (int) \floor($diff / 2);
6651
6652 14
                        $ps_length_right = (int) \ceil($diff / 2);
6653
6654 14
                        $pre = (string) \mb_substr(
6655 14
                            \str_repeat($pad_string, $ps_length_left),
6656 14
                            0,
6657 14
                            $ps_length_left
6658
                        );
6659 14
                        $post = (string) \mb_substr(
6660 14
                            \str_repeat($pad_string, $ps_length_right),
6661 14
                            0,
6662 14
                            $ps_length_right
6663
                        );
6664
6665 14
                        break;
6666
6667 9
                    case \STR_PAD_RIGHT:
6668
                    default:
6669 9
                        $ps_length = (int) \mb_strlen($pad_string);
6670
6671 9
                        $diff = ($pad_length - $str_length);
6672
6673 9
                        $post = (string) \mb_substr(
6674 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6675 9
                            0,
6676 9
                            $diff
6677
                        );
6678 9
                        $pre = '';
6679
                }
6680
6681 25
                return $pre . $str . $post;
6682
            }
6683
6684 3
            return $str;
6685
        }
6686
6687 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6688
6689 15
        $str_length = (int) self::strlen($str, $encoding);
6690
6691 15
        if ($pad_length >= $str_length) {
6692
            switch ($pad_type) {
6693 14
                case \STR_PAD_LEFT:
6694 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6695
6696 5
                    $diff = ($pad_length - $str_length);
6697
6698 5
                    $pre = (string) self::substr(
6699 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6700 5
                        0,
6701 5
                        $diff,
6702 5
                        $encoding
6703
                    );
6704 5
                    $post = '';
6705
6706 5
                    break;
6707
6708 9
                case \STR_PAD_BOTH:
6709 3
                    $diff = ($pad_length - $str_length);
6710
6711 3
                    $ps_length_left = (int) \floor($diff / 2);
6712
6713 3
                    $ps_length_right = (int) \ceil($diff / 2);
6714
6715 3
                    $pre = (string) self::substr(
6716 3
                        \str_repeat($pad_string, $ps_length_left),
6717 3
                        0,
6718 3
                        $ps_length_left,
6719 3
                        $encoding
6720
                    );
6721 3
                    $post = (string) self::substr(
6722 3
                        \str_repeat($pad_string, $ps_length_right),
6723 3
                        0,
6724 3
                        $ps_length_right,
6725 3
                        $encoding
6726
                    );
6727
6728 3
                    break;
6729
6730 6
                case \STR_PAD_RIGHT:
6731
                default:
6732 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6733
6734 6
                    $diff = ($pad_length - $str_length);
6735
6736 6
                    $post = (string) self::substr(
6737 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6738 6
                        0,
6739 6
                        $diff,
6740 6
                        $encoding
6741
                    );
6742 6
                    $pre = '';
6743
            }
6744
6745 14
            return $pre . $str . $post;
6746
        }
6747
6748 1
        return $str;
6749
    }
6750
6751
    /**
6752
     * Returns a new string of a given length such that both sides of the
6753
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
6754
     *
6755
     * @param string $str
6756
     * @param int    $length   <p>Desired string length after padding.</p>
6757
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6758
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6759
     *
6760
     * @return string
6761
     *                <p>The string with padding applied.</p>
6762
     */
6763 11
    public static function str_pad_both(
6764
        string $str,
6765
        int $length,
6766
        string $pad_str = ' ',
6767
        string $encoding = 'UTF-8'
6768
    ): string {
6769 11
        return self::str_pad(
6770 11
            $str,
6771 11
            $length,
6772 11
            $pad_str,
6773 11
            \STR_PAD_BOTH,
6774 11
            $encoding
6775
        );
6776
    }
6777
6778
    /**
6779
     * Returns a new string of a given length such that the beginning of the
6780
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
6781
     *
6782
     * @param string $str
6783
     * @param int    $length   <p>Desired string length after padding.</p>
6784
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6785
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6786
     *
6787
     * @return string
6788
     *                <p>The string with left padding.</p>
6789
     */
6790 7
    public static function str_pad_left(
6791
        string $str,
6792
        int $length,
6793
        string $pad_str = ' ',
6794
        string $encoding = 'UTF-8'
6795
    ): string {
6796 7
        return self::str_pad(
6797 7
            $str,
6798 7
            $length,
6799 7
            $pad_str,
6800 7
            \STR_PAD_LEFT,
6801 7
            $encoding
6802
        );
6803
    }
6804
6805
    /**
6806
     * Returns a new string of a given length such that the end of the string
6807
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
6808
     *
6809
     * @param string $str
6810
     * @param int    $length   <p>Desired string length after padding.</p>
6811
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6812
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6813
     *
6814
     * @return string
6815
     *                <p>The string with right padding.</p>
6816
     */
6817 7
    public static function str_pad_right(
6818
        string $str,
6819
        int $length,
6820
        string $pad_str = ' ',
6821
        string $encoding = 'UTF-8'
6822
    ): string {
6823 7
        return self::str_pad(
6824 7
            $str,
6825 7
            $length,
6826 7
            $pad_str,
6827 7
            \STR_PAD_RIGHT,
6828 7
            $encoding
6829
        );
6830
    }
6831
6832
    /**
6833
     * Repeat a string.
6834
     *
6835
     * @param string $str        <p>
6836
     *                           The string to be repeated.
6837
     *                           </p>
6838
     * @param int    $multiplier <p>
6839
     *                           Number of time the input string should be
6840
     *                           repeated.
6841
     *                           </p>
6842
     *                           <p>
6843
     *                           multiplier has to be greater than or equal to 0.
6844
     *                           If the multiplier is set to 0, the function
6845
     *                           will return an empty string.
6846
     *                           </p>
6847
     *
6848
     * @return string
6849
     *                <p>The repeated string.</P>
6850
     */
6851 9
    public static function str_repeat(string $str, int $multiplier): string
6852
    {
6853 9
        $str = self::filter($str);
6854
6855 9
        return \str_repeat($str, $multiplier);
6856
    }
6857
6858
    /**
6859
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6860
     *
6861
     * Replace all occurrences of the search string with the replacement string
6862
     *
6863
     * @see http://php.net/manual/en/function.str-replace.php
6864
     *
6865
     * @param mixed $search  <p>
6866
     *                       The value being searched for, otherwise known as the needle.
6867
     *                       An array may be used to designate multiple needles.
6868
     *                       </p>
6869
     * @param mixed $replace <p>
6870
     *                       The replacement value that replaces found search
6871
     *                       values. An array may be used to designate multiple replacements.
6872
     *                       </p>
6873
     * @param mixed $subject <p>
6874
     *                       The string or array being searched and replaced on,
6875
     *                       otherwise known as the haystack.
6876
     *                       </p>
6877
     *                       <p>
6878
     *                       If subject is an array, then the search and
6879
     *                       replace is performed with every entry of
6880
     *                       subject, and the return value is an array as
6881
     *                       well.
6882
     *                       </p>
6883
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6884
     *
6885
     * @return mixed this function returns a string or an array with the replaced values
6886
     */
6887 12
    public static function str_replace(
6888
        $search,
6889
        $replace,
6890
        $subject,
6891
        int &$count = null
6892
    ) {
6893
        /**
6894
         * @psalm-suppress PossiblyNullArgument
6895
         */
6896 12
        return \str_replace(
6897 12
            $search,
6898 12
            $replace,
6899 12
            $subject,
6900 12
            $count
6901
        );
6902
    }
6903
6904
    /**
6905
     * Replaces $search from the beginning of string with $replacement.
6906
     *
6907
     * @param string $str         <p>The input string.</p>
6908
     * @param string $search      <p>The string to search for.</p>
6909
     * @param string $replacement <p>The replacement.</p>
6910
     *
6911
     * @return string
6912
     *                <p>A string after the replacements.</p>
6913
     */
6914 17
    public static function str_replace_beginning(
6915
        string $str,
6916
        string $search,
6917
        string $replacement
6918
    ): string {
6919 17
        if ($str === '') {
6920 4
            if ($replacement === '') {
6921 2
                return '';
6922
            }
6923
6924 2
            if ($search === '') {
6925 2
                return $replacement;
6926
            }
6927
        }
6928
6929 13
        if ($search === '') {
6930 2
            return $str . $replacement;
6931
        }
6932
6933 11
        if (\strpos($str, $search) === 0) {
6934 9
            return $replacement . \substr($str, \strlen($search));
6935
        }
6936
6937 2
        return $str;
6938
    }
6939
6940
    /**
6941
     * Replaces $search from the ending of string with $replacement.
6942
     *
6943
     * @param string $str         <p>The input string.</p>
6944
     * @param string $search      <p>The string to search for.</p>
6945
     * @param string $replacement <p>The replacement.</p>
6946
     *
6947
     * @return string
6948
     *                <p>A string after the replacements.</p>
6949
     */
6950 17
    public static function str_replace_ending(
6951
        string $str,
6952
        string $search,
6953
        string $replacement
6954
    ): string {
6955 17
        if ($str === '') {
6956 4
            if ($replacement === '') {
6957 2
                return '';
6958
            }
6959
6960 2
            if ($search === '') {
6961 2
                return $replacement;
6962
            }
6963
        }
6964
6965 13
        if ($search === '') {
6966 2
            return $str . $replacement;
6967
        }
6968
6969 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6970 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6971
        }
6972
6973 11
        return $str;
6974
    }
6975
6976
    /**
6977
     * Replace the first "$search"-term with the "$replace"-term.
6978
     *
6979
     * @param string $search
6980
     * @param string $replace
6981
     * @param string $subject
6982
     *
6983
     * @return string
6984
     *
6985
     * @psalm-suppress InvalidReturnType
6986
     */
6987 2
    public static function str_replace_first(
6988
        string $search,
6989
        string $replace,
6990
        string $subject
6991
    ): string {
6992 2
        $pos = self::strpos($subject, $search);
6993
6994 2
        if ($pos !== false) {
6995
            /**
6996
             * @psalm-suppress InvalidReturnStatement
6997
             */
6998 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6999 2
                $subject,
7000 2
                $replace,
7001 2
                $pos,
7002 2
                (int) self::strlen($search)
7003
            );
7004
        }
7005
7006 2
        return $subject;
7007
    }
7008
7009
    /**
7010
     * Replace the last "$search"-term with the "$replace"-term.
7011
     *
7012
     * @param string $search
7013
     * @param string $replace
7014
     * @param string $subject
7015
     *
7016
     * @return string
7017
     *
7018
     * @psalm-suppress InvalidReturnType
7019
     */
7020 2
    public static function str_replace_last(
7021
        string $search,
7022
        string $replace,
7023
        string $subject
7024
    ): string {
7025 2
        $pos = self::strrpos($subject, $search);
7026 2
        if ($pos !== false) {
7027
            /**
7028
             * @psalm-suppress InvalidReturnStatement
7029
             */
7030 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7031 2
                $subject,
7032 2
                $replace,
7033 2
                $pos,
7034 2
                (int) self::strlen($search)
7035
            );
7036
        }
7037
7038 2
        return $subject;
7039
    }
7040
7041
    /**
7042
     * Shuffles all the characters in the string.
7043
     *
7044
     * PS: uses random algorithm which is weak for cryptography purposes
7045
     *
7046
     * @param string $str      <p>The input string</p>
7047
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7048
     *
7049
     * @return string
7050
     *                <p>The shuffled string.</p>
7051
     */
7052 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7053
    {
7054 5
        if ($encoding === 'UTF-8') {
7055 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7056
            /** @noinspection NonSecureShuffleUsageInspection */
7057 5
            \shuffle($indexes);
7058
7059
            // init
7060 5
            $shuffled_str = '';
7061
7062 5
            foreach ($indexes as &$i) {
7063 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
7064 5
                if ($tmp_sub_str !== false) {
7065 5
                    $shuffled_str .= $tmp_sub_str;
7066
                }
7067
            }
7068
        } else {
7069
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7070
7071
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7072
            /** @noinspection NonSecureShuffleUsageInspection */
7073
            \shuffle($indexes);
7074
7075
            // init
7076
            $shuffled_str = '';
7077
7078
            foreach ($indexes as &$i) {
7079
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
7080
                if ($tmp_sub_str !== false) {
7081
                    $shuffled_str .= $tmp_sub_str;
7082
                }
7083
            }
7084
        }
7085
7086 5
        return $shuffled_str;
7087
    }
7088
7089
    /**
7090
     * Returns the substring beginning at $start, and up to, but not including
7091
     * the index specified by $end. If $end is omitted, the function extracts
7092
     * the remaining string. If $end is negative, it is computed from the end
7093
     * of the string.
7094
     *
7095
     * @param string $str
7096
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7097
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7098
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7099
     *
7100
     * @return false|string
7101
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7102
     *                      characters long, <b>FALSE</b> will be returned.
7103
     */
7104 18
    public static function str_slice(
7105
        string $str,
7106
        int $start,
7107
        int $end = null,
7108
        string $encoding = 'UTF-8'
7109
    ) {
7110 18
        if ($encoding === 'UTF-8') {
7111 7
            if ($end === null) {
7112 1
                $length = (int) \mb_strlen($str);
7113 6
            } elseif ($end >= 0 && $end <= $start) {
7114 2
                return '';
7115 4
            } elseif ($end < 0) {
7116 1
                $length = (int) \mb_strlen($str) + $end - $start;
7117
            } else {
7118 3
                $length = $end - $start;
7119
            }
7120
7121 5
            return \mb_substr($str, $start, $length);
7122
        }
7123
7124 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7125
7126 11
        if ($end === null) {
7127 5
            $length = (int) self::strlen($str, $encoding);
7128 6
        } elseif ($end >= 0 && $end <= $start) {
7129 2
            return '';
7130 4
        } elseif ($end < 0) {
7131 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7132
        } else {
7133 3
            $length = $end - $start;
7134
        }
7135
7136 9
        return self::substr($str, $start, $length, $encoding);
7137
    }
7138
7139
    /**
7140
     * Convert a string to e.g.: "snake_case"
7141
     *
7142
     * @param string $str
7143
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7144
     *
7145
     * @return string
7146
     *                <p>A string in snake_case.</p>
7147
     */
7148 22
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7149
    {
7150 22
        if ($str === '') {
7151
            return '';
7152
        }
7153
7154 22
        $str = \str_replace(
7155 22
            '-',
7156 22
            '_',
7157 22
            self::normalize_whitespace($str)
7158
        );
7159
7160 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7161 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7162
        }
7163
7164 22
        $str = (string) \preg_replace_callback(
7165 22
            '/([\\p{N}|\\p{Lu}])/u',
7166
            /**
7167
             * @param string[] $matches
7168
             *
7169
             * @return string
7170
             */
7171
            static function (array $matches) use ($encoding): string {
7172 9
                $match = $matches[1];
7173 9
                $match_int = (int) $match;
7174
7175 9
                if ((string) $match_int === $match) {
7176 4
                    return '_' . $match . '_';
7177
                }
7178
7179 5
                if ($encoding === 'UTF-8') {
7180 5
                    return '_' . \mb_strtolower($match);
7181
                }
7182
7183
                return '_' . self::strtolower($match, $encoding);
7184 22
            },
7185 22
            $str
7186
        );
7187
7188 22
        $str = (string) \preg_replace(
7189
            [
7190 22
                '/\\s+/u',           // convert spaces to "_"
7191
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
7192
                '/_+/',                 // remove double "_"
7193
            ],
7194
            [
7195 22
                '_',
7196
                '',
7197
                '_',
7198
            ],
7199 22
            $str
7200
        );
7201
7202 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7203
    }
7204
7205
    /**
7206
     * Sort all characters according to code points.
7207
     *
7208
     * @param string $str    <p>A UTF-8 string.</p>
7209
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7210
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7211
     *
7212
     * @return string
7213
     *                <p>A string of sorted characters.</p>
7214
     */
7215 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7216
    {
7217 2
        $array = self::codepoints($str);
7218
7219 2
        if ($unique) {
7220 2
            $array = \array_flip(\array_flip($array));
7221
        }
7222
7223 2
        if ($desc) {
7224 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7224
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7225
        } else {
7226 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7226
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7227
        }
7228
7229 2
        return self::string($array);
7230
    }
7231
7232
    /**
7233
     * Convert a string to an array of Unicode characters.
7234
     *
7235
     * @param int|int[]|string|string[] $str                     <p>The string to split into array.</p>
7236
     * @param int                       $length                  [optional] <p>Max character length of each array
7237
     *                                                           element.</p>
7238
     * @param bool                      $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
7239
     * @param bool                      $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
7240
     *                                                           "mb_substr"</p>
7241
     *
7242
     * @return array
7243
     *                  <p>An array containing chunks of the input.</p>
7244
     */
7245 89
    public static function str_split(
7246
        $str,
7247
        int $length = 1,
7248
        bool $clean_utf8 = false,
7249
        bool $try_to_use_mb_functions = true
7250
    ): array {
7251 89
        if ($length <= 0) {
7252 3
            return [];
7253
        }
7254
7255 88
        if (\is_array($str) === true) {
7256 2
            foreach ($str as $k => &$v) {
7257 2
                $v = self::str_split(
7258 2
                    $v,
7259 2
                    $length,
7260 2
                    $clean_utf8,
7261 2
                    $try_to_use_mb_functions
7262
                );
7263
            }
7264
7265 2
            return $str;
7266
        }
7267
7268
        // init
7269 88
        $str = (string) $str;
7270
7271 88
        if ($str === '') {
7272 13
            return [];
7273
        }
7274
7275 85
        if ($clean_utf8 === true) {
7276 19
            $str = self::clean($str);
7277
        }
7278
7279
        if (
7280 85
            $try_to_use_mb_functions === true
7281
            &&
7282 85
            self::$SUPPORT['mbstring'] === true
7283
        ) {
7284 81
            if (Bootup::is_php('7.4')) {
7285
                return \mb_str_split($str, $length);
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_str_split($str, $length) could return the type false which is incompatible with the type-hinted return array. Consider adding an additional type-check to rule them out.
Loading history...
7286
            }
7287
7288 81
            $i_max = \mb_strlen($str);
7289 81
            if ($i_max <= 127) {
7290 75
                $ret = [];
7291 75
                for ($i = 0; $i < $i_max; ++$i) {
7292 75
                    $ret[] = \mb_substr($str, $i, 1);
7293
                }
7294
            } else {
7295 16
                $return_array = [];
7296 16
                \preg_match_all('/./us', $str, $return_array);
7297 81
                $ret = $return_array[0] ?? [];
7298
            }
7299 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7300 17
            $return_array = [];
7301 17
            \preg_match_all('/./us', $str, $return_array);
7302 17
            $ret = $return_array[0] ?? [];
7303
        } else {
7304
7305
            // fallback
7306
7307 8
            $ret = [];
7308 8
            $len = \strlen($str);
7309
7310
            /** @noinspection ForeachInvariantsInspection */
7311 8
            for ($i = 0; $i < $len; ++$i) {
7312 8
                if (($str[$i] & "\x80") === "\x00") {
7313 8
                    $ret[] = $str[$i];
7314
                } elseif (
7315 8
                    isset($str[$i + 1])
7316
                    &&
7317 8
                    ($str[$i] & "\xE0") === "\xC0"
7318
                ) {
7319 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7320 4
                        $ret[] = $str[$i] . $str[$i + 1];
7321
7322 4
                        ++$i;
7323
                    }
7324
                } elseif (
7325 6
                    isset($str[$i + 2])
7326
                    &&
7327 6
                    ($str[$i] & "\xF0") === "\xE0"
7328
                ) {
7329
                    if (
7330 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7331
                        &&
7332 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7333
                    ) {
7334 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7335
7336 6
                        $i += 2;
7337
                    }
7338
                } elseif (
7339
                    isset($str[$i + 3])
7340
                    &&
7341
                    ($str[$i] & "\xF8") === "\xF0"
7342
                ) {
7343
                    if (
7344
                        ($str[$i + 1] & "\xC0") === "\x80"
7345
                        &&
7346
                        ($str[$i + 2] & "\xC0") === "\x80"
7347
                        &&
7348
                        ($str[$i + 3] & "\xC0") === "\x80"
7349
                    ) {
7350
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7351
7352
                        $i += 3;
7353
                    }
7354
                }
7355
            }
7356
        }
7357
7358 85
        if ($length > 1) {
7359 11
            $ret = \array_chunk($ret, $length);
7360
7361 11
            return \array_map(
7362
                static function (array &$item): string {
7363 11
                    return \implode('', $item);
7364 11
                },
7365 11
                $ret
7366
            );
7367
        }
7368
7369 78
        if (isset($ret[0]) && $ret[0] === '') {
7370
            return [];
7371
        }
7372
7373 78
        return $ret;
7374
    }
7375
7376
    /**
7377
     * Splits the string with the provided regular expression, returning an
7378
     * array of strings. An optional integer $limit will truncate the
7379
     * results.
7380
     *
7381
     * @param string $str
7382
     * @param string $pattern <p>The regex with which to split the string.</p>
7383
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7384
     *
7385
     * @return string[]
7386
     *                  <p>An array of strings.</p>
7387
     */
7388 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7389
    {
7390 16
        if ($limit === 0) {
7391 2
            return [];
7392
        }
7393
7394 14
        if ($pattern === '') {
7395 1
            return [$str];
7396
        }
7397
7398 13
        if (self::$SUPPORT['mbstring'] === true) {
7399 13
            if ($limit >= 0) {
7400
                /** @noinspection PhpComposerExtensionStubsInspection */
7401 8
                $result_tmp = \mb_split($pattern, $str);
7402
7403 8
                $result = [];
7404 8
                foreach ($result_tmp as $item_tmp) {
7405 8
                    if ($limit === 0) {
7406 4
                        break;
7407
                    }
7408 8
                    --$limit;
7409
7410 8
                    $result[] = $item_tmp;
7411
                }
7412
7413 8
                return $result;
7414
            }
7415
7416
            /** @noinspection PhpComposerExtensionStubsInspection */
7417 5
            return \mb_split($pattern, $str);
7418
        }
7419
7420
        if ($limit > 0) {
7421
            ++$limit;
7422
        } else {
7423
            $limit = -1;
7424
        }
7425
7426
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7427
7428
        if ($array === false) {
7429
            return [];
7430
        }
7431
7432
        if ($limit > 0 && \count($array) === $limit) {
7433
            \array_pop($array);
7434
        }
7435
7436
        return $array;
7437
    }
7438
7439
    /**
7440
     * Check if the string starts with the given substring.
7441
     *
7442
     * @param string $haystack <p>The string to search in.</p>
7443
     * @param string $needle   <p>The substring to search for.</p>
7444
     *
7445
     * @return bool
7446
     */
7447 19
    public static function str_starts_with(string $haystack, string $needle): bool
7448
    {
7449 19
        if ($needle === '') {
7450 2
            return true;
7451
        }
7452
7453 19
        if ($haystack === '') {
7454
            return false;
7455
        }
7456
7457 19
        return \strpos($haystack, $needle) === 0;
7458
    }
7459
7460
    /**
7461
     * Returns true if the string begins with any of $substrings, false otherwise.
7462
     *
7463
     * - case-sensitive
7464
     *
7465
     * @param string $str        <p>The input string.</p>
7466
     * @param array  $substrings <p>Substrings to look for.</p>
7467
     *
7468
     * @return bool whether or not $str starts with $substring
7469
     */
7470 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7471
    {
7472 8
        if ($str === '') {
7473
            return false;
7474
        }
7475
7476 8
        if ($substrings === []) {
7477
            return false;
7478
        }
7479
7480 8
        foreach ($substrings as &$substring) {
7481 8
            if (self::str_starts_with($str, $substring)) {
7482 8
                return true;
7483
            }
7484
        }
7485
7486 6
        return false;
7487
    }
7488
7489
    /**
7490
     * Gets the substring after the first occurrence of a separator.
7491
     *
7492
     * @param string $str       <p>The input string.</p>
7493
     * @param string $separator <p>The string separator.</p>
7494
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7495
     *
7496
     * @return string
7497
     */
7498 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7499
    {
7500 1
        if ($separator === '' || $str === '') {
7501 1
            return '';
7502
        }
7503
7504 1
        if ($encoding === 'UTF-8') {
7505 1
            $offset = \mb_strpos($str, $separator);
7506 1
            if ($offset === false) {
7507 1
                return '';
7508
            }
7509
7510 1
            return (string) \mb_substr(
7511 1
                $str,
7512 1
                $offset + (int) \mb_strlen($separator)
7513
            );
7514
        }
7515
7516
        $offset = self::strpos($str, $separator, 0, $encoding);
7517
        if ($offset === false) {
7518
            return '';
7519
        }
7520
7521
        return (string) \mb_substr(
7522
            $str,
7523
            $offset + (int) self::strlen($separator, $encoding),
7524
            null,
7525
            $encoding
7526
        );
7527
    }
7528
7529
    /**
7530
     * Gets the substring after the last occurrence of a separator.
7531
     *
7532
     * @param string $str       <p>The input string.</p>
7533
     * @param string $separator <p>The string separator.</p>
7534
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7535
     *
7536
     * @return string
7537
     */
7538 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7539
    {
7540 1
        if ($separator === '' || $str === '') {
7541 1
            return '';
7542
        }
7543
7544 1
        if ($encoding === 'UTF-8') {
7545 1
            $offset = \mb_strrpos($str, $separator);
7546 1
            if ($offset === false) {
7547 1
                return '';
7548
            }
7549
7550 1
            return (string) \mb_substr(
7551 1
                $str,
7552 1
                $offset + (int) \mb_strlen($separator)
7553
            );
7554
        }
7555
7556
        $offset = self::strrpos($str, $separator, 0, $encoding);
7557
        if ($offset === false) {
7558
            return '';
7559
        }
7560
7561
        return (string) self::substr(
7562
            $str,
7563
            $offset + (int) self::strlen($separator, $encoding),
7564
            null,
7565
            $encoding
7566
        );
7567
    }
7568
7569
    /**
7570
     * Gets the substring before the first occurrence of a separator.
7571
     *
7572
     * @param string $str       <p>The input string.</p>
7573
     * @param string $separator <p>The string separator.</p>
7574
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7575
     *
7576
     * @return string
7577
     */
7578 1
    public static function str_substr_before_first_separator(
7579
        string $str,
7580
        string $separator,
7581
        string $encoding = 'UTF-8'
7582
    ): string {
7583 1
        if ($separator === '' || $str === '') {
7584 1
            return '';
7585
        }
7586
7587 1
        if ($encoding === 'UTF-8') {
7588 1
            $offset = \mb_strpos($str, $separator);
7589 1
            if ($offset === false) {
7590 1
                return '';
7591
            }
7592
7593 1
            return (string) \mb_substr(
7594 1
                $str,
7595 1
                0,
7596 1
                $offset
7597
            );
7598
        }
7599
7600
        $offset = self::strpos($str, $separator, 0, $encoding);
7601
        if ($offset === false) {
7602
            return '';
7603
        }
7604
7605
        return (string) self::substr(
7606
            $str,
7607
            0,
7608
            $offset,
7609
            $encoding
7610
        );
7611
    }
7612
7613
    /**
7614
     * Gets the substring before the last occurrence of a separator.
7615
     *
7616
     * @param string $str       <p>The input string.</p>
7617
     * @param string $separator <p>The string separator.</p>
7618
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7619
     *
7620
     * @return string
7621
     */
7622 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7623
    {
7624 1
        if ($separator === '' || $str === '') {
7625 1
            return '';
7626
        }
7627
7628 1
        if ($encoding === 'UTF-8') {
7629 1
            $offset = \mb_strrpos($str, $separator);
7630 1
            if ($offset === false) {
7631 1
                return '';
7632
            }
7633
7634 1
            return (string) \mb_substr(
7635 1
                $str,
7636 1
                0,
7637 1
                $offset
7638
            );
7639
        }
7640
7641
        $offset = self::strrpos($str, $separator, 0, $encoding);
7642
        if ($offset === false) {
7643
            return '';
7644
        }
7645
7646
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7647
7648
        return (string) self::substr(
7649
            $str,
7650
            0,
7651
            $offset,
7652
            $encoding
7653
        );
7654
    }
7655
7656
    /**
7657
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7658
     *
7659
     * @param string $str           <p>The input string.</p>
7660
     * @param string $needle        <p>The string to look for.</p>
7661
     * @param bool   $before_needle [optional] <p>Default: false</p>
7662
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7663
     *
7664
     * @return string
7665
     */
7666 2
    public static function str_substr_first(
7667
        string $str,
7668
        string $needle,
7669
        bool $before_needle = false,
7670
        string $encoding = 'UTF-8'
7671
    ): string {
7672 2
        if ($str === '' || $needle === '') {
7673 2
            return '';
7674
        }
7675
7676 2
        if ($encoding === 'UTF-8') {
7677 2
            if ($before_needle === true) {
7678 1
                $part = \mb_strstr(
7679 1
                    $str,
7680 1
                    $needle,
7681 1
                    $before_needle
7682
                );
7683
            } else {
7684 1
                $part = \mb_strstr(
7685 1
                    $str,
7686 2
                    $needle
7687
                );
7688
            }
7689
        } else {
7690
            $part = self::strstr(
7691
                $str,
7692
                $needle,
7693
                $before_needle,
7694
                $encoding
7695
            );
7696
        }
7697
7698 2
        return $part === false ? '' : $part;
7699
    }
7700
7701
    /**
7702
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7703
     *
7704
     * @param string $str           <p>The input string.</p>
7705
     * @param string $needle        <p>The string to look for.</p>
7706
     * @param bool   $before_needle [optional] <p>Default: false</p>
7707
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7708
     *
7709
     * @return string
7710
     */
7711 2
    public static function str_substr_last(
7712
        string $str,
7713
        string $needle,
7714
        bool $before_needle = false,
7715
        string $encoding = 'UTF-8'
7716
    ): string {
7717 2
        if ($str === '' || $needle === '') {
7718 2
            return '';
7719
        }
7720
7721 2
        if ($encoding === 'UTF-8') {
7722 2
            if ($before_needle === true) {
7723 1
                $part = \mb_strrchr(
7724 1
                    $str,
7725 1
                    $needle,
7726 1
                    $before_needle
7727
                );
7728
            } else {
7729 1
                $part = \mb_strrchr(
7730 1
                    $str,
7731 2
                    $needle
7732
                );
7733
            }
7734
        } else {
7735
            $part = self::strrchr(
7736
                $str,
7737
                $needle,
7738
                $before_needle,
7739
                $encoding
7740
            );
7741
        }
7742
7743 2
        return $part === false ? '' : $part;
7744
    }
7745
7746
    /**
7747
     * Surrounds $str with the given substring.
7748
     *
7749
     * @param string $str
7750
     * @param string $substring <p>The substring to add to both sides.</P>
7751
     *
7752
     * @return string
7753
     *                <p>A string with the substring both prepended and appended.</p>
7754
     */
7755 5
    public static function str_surround(string $str, string $substring): string
7756
    {
7757 5
        return $substring . $str . $substring;
7758
    }
7759
7760
    /**
7761
     * Returns a trimmed string with the first letter of each word capitalized.
7762
     * Also accepts an array, $ignore, allowing you to list words not to be
7763
     * capitalized.
7764
     *
7765
     * @param string              $str
7766
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or null.
7767
     *                                                           Default: null</p>
7768
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
7769
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
7770
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az, el, lt,
7771
     *                                                           tr</p>
7772
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
7773
     *                                                           ß</p>
7774
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string, first</p>
7775
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as whitespace separator === words.</p>
7776
     *
7777
     * @return string
7778
     *                <p>The titleized string.</p>
7779
     */
7780 10
    public static function str_titleize(
7781
        string $str,
7782
        array $ignore = null,
7783
        string $encoding = 'UTF-8',
7784
        bool $clean_utf8 = false,
7785
        string $lang = null,
7786
        bool $try_to_keep_the_string_length = false,
7787
        bool $use_trim_first = true,
7788
        string $word_define_chars = null
7789
    ): string {
7790 10
        if ($str === '') {
7791
            return '';
7792
        }
7793
7794 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7795 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7796
        }
7797
7798 10
        if ($use_trim_first === true) {
7799 10
            $str = \trim($str);
7800
        }
7801
7802 10
        if ($clean_utf8 === true) {
7803
            $str = self::clean($str);
7804
        }
7805
7806 10
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
7807
7808 10
        if ($word_define_chars) {
7809 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
7810
        } else {
7811 6
            $word_define_chars = '';
7812
        }
7813
7814 10
        $str = (string) \preg_replace_callback(
7815 10
            '/([^\\s' . $word_define_chars . ']+)/u',
7816
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
7817 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7818 4
                    return $match[0];
7819
                }
7820
7821 10
                if ($use_mb_functions === true) {
7822 10
                    if ($encoding === 'UTF-8') {
7823 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7824 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
7825
                    }
7826
7827
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7828
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7829
                }
7830
7831
                return self::ucfirst(
7832
                    self::strtolower(
7833
                        $match[0],
7834
                        $encoding,
7835
                        false,
7836
                        $lang,
7837
                        $try_to_keep_the_string_length
7838
                    ),
7839
                    $encoding,
7840
                    false,
7841
                    $lang,
7842
                    $try_to_keep_the_string_length
7843
                );
7844 10
            },
7845 10
            $str
7846
        );
7847
7848 10
        return $str;
7849
    }
7850
7851
    /**
7852
     * Returns a trimmed string in proper title case.
7853
     *
7854
     * Also accepts an array, $ignore, allowing you to list words not to be
7855
     * capitalized.
7856
     *
7857
     * Adapted from John Gruber's script.
7858
     *
7859
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7860
     *
7861
     * @param string $str
7862
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7863
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7864
     *
7865
     * @return string
7866
     *                <p>The titleized string.</p>
7867
     */
7868 35
    public static function str_titleize_for_humans(
7869
        string $str,
7870
        array $ignore = [],
7871
        string $encoding = 'UTF-8'
7872
    ): string {
7873 35
        if ($str === '') {
7874
            return '';
7875
        }
7876
7877
        $small_words = [
7878 35
            '(?<!q&)a',
7879
            'an',
7880
            'and',
7881
            'as',
7882
            'at(?!&t)',
7883
            'but',
7884
            'by',
7885
            'en',
7886
            'for',
7887
            'if',
7888
            'in',
7889
            'of',
7890
            'on',
7891
            'or',
7892
            'the',
7893
            'to',
7894
            'v[.]?',
7895
            'via',
7896
            'vs[.]?',
7897
        ];
7898
7899 35
        if ($ignore !== []) {
7900 1
            $small_words = \array_merge($small_words, $ignore);
7901
        }
7902
7903 35
        $small_words_rx = \implode('|', $small_words);
7904 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
7905
7906 35
        $str = \trim($str);
7907
7908 35
        if (self::has_lowercase($str) === false) {
7909 2
            $str = self::strtolower($str, $encoding);
7910
        }
7911
7912
        // the main substitutions
7913 35
        $str = (string) \preg_replace_callback(
7914
            '~\\b (_*) (?:                                                         # 1. Leading underscore and
7915
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7916 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) #    URL, domain, or email
7917
                        |
7918 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )            # 3. or small word (case-insensitive)
7919
                        |
7920 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
7921
                        |
7922 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
7923
                      ) (_*) \\b                                                          # 6. With trailing underscore
7924
                    ~ux',
7925
            /**
7926
             * @param string[] $matches
7927
             *
7928
             * @return string
7929
             */
7930
            static function (array $matches) use ($encoding): string {
7931
                // preserve leading underscore
7932 35
                $str = $matches[1];
7933 35
                if ($matches[2]) {
7934
                    // preserve URLs, domains, emails and file paths
7935 5
                    $str .= $matches[2];
7936 35
                } elseif ($matches[3]) {
7937
                    // lower-case small words
7938 25
                    $str .= self::strtolower($matches[3], $encoding);
7939 35
                } elseif ($matches[4]) {
7940
                    // capitalize word w/o internal caps
7941 34
                    $str .= static::ucfirst($matches[4], $encoding);
7942
                } else {
7943
                    // preserve other kinds of word (iPhone)
7944 7
                    $str .= $matches[5];
7945
                }
7946
                // preserve trailing underscore
7947 35
                $str .= $matches[6];
7948
7949 35
                return $str;
7950 35
            },
7951 35
            $str
7952
        );
7953
7954
        // Exceptions for small words: capitalize at start of title...
7955 35
        $str = (string) \preg_replace_callback(
7956
            '~(  \\A [[:punct:]]*            # start of title...
7957
                      |  [:.;?!][ ]+                # or of subsentence...
7958
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
7959 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
7960
                     ~uxi',
7961
            /**
7962
             * @param string[] $matches
7963
             *
7964
             * @return string
7965
             */
7966
            static function (array $matches) use ($encoding): string {
7967 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
7968 35
            },
7969 35
            $str
7970
        );
7971
7972
        // ...and end of title
7973 35
        $str = (string) \preg_replace_callback(
7974 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
7975
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
7976
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
7977
                     ~uxi',
7978
            /**
7979
             * @param string[] $matches
7980
             *
7981
             * @return string
7982
             */
7983
            static function (array $matches) use ($encoding): string {
7984 3
                return static::ucfirst($matches[1], $encoding);
7985 35
            },
7986 35
            $str
7987
        );
7988
7989
        // Exceptions for small words in hyphenated compound words.
7990
        // e.g. "in-flight" -> In-Flight
7991 35
        $str = (string) \preg_replace_callback(
7992
            '~\\b
7993
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7994 35
                        ( ' . $small_words_rx . ' )
7995
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7996
                       ~uxi',
7997
            /**
7998
             * @param string[] $matches
7999
             *
8000
             * @return string
8001
             */
8002
            static function (array $matches) use ($encoding): string {
8003
                return static::ucfirst($matches[1], $encoding);
8004 35
            },
8005 35
            $str
8006
        );
8007
8008
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
8009 35
        $str = (string) \preg_replace_callback(
8010
            '~\\b
8011
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
8012
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
8013 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
8014
                      (?!	- )                 # Negative lookahead for another -
8015
                     ~uxi',
8016
            /**
8017
             * @param string[] $matches
8018
             *
8019
             * @return string
8020
             */
8021
            static function (array $matches) use ($encoding): string {
8022
                return $matches[1] . static::ucfirst($matches[2], $encoding);
8023 35
            },
8024 35
            $str
8025
        );
8026
8027 35
        return $str;
8028
    }
8029
8030
    /**
8031
     * Get a binary representation of a specific string.
8032
     *
8033
     * @param string $str <p>The input string.</p>
8034
     *
8035
     * @return false|string
8036
     *                      <p>false on error</p>
8037
     */
8038 2
    public static function str_to_binary(string $str)
8039
    {
8040
        /** @var array|false $value - needed for PhpStan (stubs error) */
8041 2
        $value = \unpack('H*', $str);
8042 2
        if ($value === false) {
8043
            return false;
8044
        }
8045
8046
        /** @noinspection OffsetOperationsInspection */
8047 2
        return \base_convert($value[1], 16, 2);
8048
    }
8049
8050
    /**
8051
     * @param string   $str
8052
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8053
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8054
     *
8055
     * @return string[]
8056
     */
8057 17
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
8058
    {
8059 17
        if ($str === '') {
8060 1
            return $remove_empty_values === true ? [] : [''];
8061
        }
8062
8063 16
        if (self::$SUPPORT['mbstring'] === true) {
8064
            /** @noinspection PhpComposerExtensionStubsInspection */
8065 16
            $return = \mb_split("[\r\n]{1,2}", $str);
8066
        } else {
8067
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
8068
        }
8069
8070 16
        if ($return === false) {
8071
            return $remove_empty_values === true ? [] : [''];
8072
        }
8073
8074
        if (
8075 16
            $remove_short_values === null
8076
            &&
8077 16
            $remove_empty_values === false
8078
        ) {
8079 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8080
        }
8081
8082
        return self::reduce_string_array(
8083
            $return,
8084
            $remove_empty_values,
8085
            $remove_short_values
8086
        );
8087
    }
8088
8089
    /**
8090
     * Convert a string into an array of words.
8091
     *
8092
     * @param string   $str
8093
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
8094
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
8095
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
8096
     *
8097
     * @return string[]
8098
     */
8099 13
    public static function str_to_words(
8100
        string $str,
8101
        string $char_list = '',
8102
        bool $remove_empty_values = false,
8103
        int $remove_short_values = null
8104
    ): array {
8105 13
        if ($str === '') {
8106 4
            return $remove_empty_values === true ? [] : [''];
8107
        }
8108
8109 13
        $char_list = self::rxClass($char_list, '\pL');
8110
8111 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8112 13
        if ($return === false) {
8113
            return $remove_empty_values === true ? [] : [''];
8114
        }
8115
8116
        if (
8117 13
            $remove_short_values === null
8118
            &&
8119 13
            $remove_empty_values === false
8120
        ) {
8121 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8122
        }
8123
8124 2
        $tmp_return = self::reduce_string_array(
8125 2
            $return,
8126 2
            $remove_empty_values,
8127 2
            $remove_short_values
8128
        );
8129
8130 2
        foreach ($tmp_return as &$item) {
8131 2
            $item = (string) $item;
8132
        }
8133
8134 2
        return $tmp_return;
8135
    }
8136
8137
    /**
8138
     * alias for "UTF8::to_ascii()"
8139
     *
8140
     * @param string $str
8141
     * @param string $unknown
8142
     * @param bool   $strict
8143
     *
8144
     * @return string
8145
     *
8146
     * @see UTF8::to_ascii()
8147
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
8148
     */
8149 7
    public static function str_transliterate(
8150
        string $str,
8151
        string $unknown = '?',
8152
        bool $strict = false
8153
    ): string {
8154 7
        return self::to_ascii($str, $unknown, $strict);
8155
    }
8156
8157
    /**
8158
     * Truncates the string to a given length. If $substring is provided, and
8159
     * truncating occurs, the string is further truncated so that the substring
8160
     * may be appended without exceeding the desired length.
8161
     *
8162
     * @param string $str
8163
     * @param int    $length    <p>Desired length of the truncated string.</p>
8164
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8165
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8166
     *
8167
     * @return string
8168
     *                <p>A string after truncating.</p>
8169
     */
8170 22
    public static function str_truncate(
8171
        string $str,
8172
        int $length,
8173
        string $substring = '',
8174
        string $encoding = 'UTF-8'
8175
    ): string {
8176 22
        if ($str === '') {
8177
            return '';
8178
        }
8179
8180 22
        if ($encoding === 'UTF-8') {
8181 10
            if ($length >= (int) \mb_strlen($str)) {
8182 2
                return $str;
8183
            }
8184
8185 8
            if ($substring !== '') {
8186 4
                $length -= (int) \mb_strlen($substring);
8187
8188
                /** @noinspection UnnecessaryCastingInspection */
8189 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8190
            }
8191
8192
            /** @noinspection UnnecessaryCastingInspection */
8193 4
            return (string) \mb_substr($str, 0, $length);
8194
        }
8195
8196 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8197
8198 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8199 2
            return $str;
8200
        }
8201
8202 10
        if ($substring !== '') {
8203 6
            $length -= (int) self::strlen($substring, $encoding);
8204
        }
8205
8206
        return (
8207 10
               (string) self::substr(
8208 10
                   $str,
8209 10
                   0,
8210 10
                   $length,
8211 10
                   $encoding
8212
               )
8213 10
               ) . $substring;
8214
    }
8215
8216
    /**
8217
     * Truncates the string to a given length, while ensuring that it does not
8218
     * split words. If $substring is provided, and truncating occurs, the
8219
     * string is further truncated so that the substring may be appended without
8220
     * exceeding the desired length.
8221
     *
8222
     * @param string $str
8223
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
8224
     * @param string $substring                              [optional] <p>The substring to append if it can fit. Default:
8225
     *                                                       ''</p>
8226
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
8227
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
8228
     *
8229
     * @return string
8230
     *                <p>A string after truncating.</p>
8231
     */
8232 47
    public static function str_truncate_safe(
8233
        string $str,
8234
        int $length,
8235
        string $substring = '',
8236
        string $encoding = 'UTF-8',
8237
        bool $ignore_do_not_split_words_for_one_word = false
8238
    ): string {
8239 47
        if ($str === '' || $length <= 0) {
8240 1
            return $substring;
8241
        }
8242
8243 47
        if ($encoding === 'UTF-8') {
8244 21
            if ($length >= (int) \mb_strlen($str)) {
8245 5
                return $str;
8246
            }
8247
8248
            // need to further trim the string so we can append the substring
8249 17
            $length -= (int) \mb_strlen($substring);
8250 17
            if ($length <= 0) {
8251 1
                return $substring;
8252
            }
8253
8254
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
8255 17
            $truncated = \mb_substr($str, 0, $length);
8256 17
            if ($truncated === false) {
8257
                return '';
8258
            }
8259
8260
            // if the last word was truncated
8261 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
8262 17
            if ($space_position !== $length) {
8263
                // find pos of the last occurrence of a space, get up to that
8264 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
8265
8266
                if (
8267 13
                    $last_position !== false
8268
                    ||
8269 13
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8270
                ) {
8271 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
8272
                }
8273
            }
8274
        } else {
8275 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8276
8277 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8278 4
                return $str;
8279
            }
8280
8281
            // need to further trim the string so we can append the substring
8282 22
            $length -= (int) self::strlen($substring, $encoding);
8283 22
            if ($length <= 0) {
8284
                return $substring;
8285
            }
8286
8287 22
            $truncated = self::substr($str, 0, $length, $encoding);
8288
8289 22
            if ($truncated === false) {
8290
                return '';
8291
            }
8292
8293
            // if the last word was truncated
8294 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
8295 22
            if ($space_position !== $length) {
8296
                // find pos of the last occurrence of a space, get up to that
8297 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
8298
8299
                if (
8300 12
                    $last_position !== false
8301
                    ||
8302 12
                    ($space_position !== false && $ignore_do_not_split_words_for_one_word === false)
8303
                ) {
8304 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
8305
                }
8306
            }
8307
        }
8308
8309 39
        return $truncated . $substring;
8310
    }
8311
8312
    /**
8313
     * Returns a lowercase and trimmed string separated by underscores.
8314
     * Underscores are inserted before uppercase characters (with the exception
8315
     * of the first character of the string), and in place of spaces as well as
8316
     * dashes.
8317
     *
8318
     * @param string $str
8319
     *
8320
     * @return string
8321
     *                <p>The underscored string.</p>
8322
     */
8323 16
    public static function str_underscored(string $str): string
8324
    {
8325 16
        return self::str_delimit($str, '_');
8326
    }
8327
8328
    /**
8329
     * Returns an UpperCamelCase version of the supplied string. It trims
8330
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8331
     * and underscores, and removes spaces, dashes, underscores.
8332
     *
8333
     * @param string      $str                           <p>The input string.</p>
8334
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
8335
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
8336
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8337
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8338
     *
8339
     * @return string
8340
     *                <p>A string in UpperCamelCase.</p>
8341
     */
8342 13
    public static function str_upper_camelize(
8343
        string $str,
8344
        string $encoding = 'UTF-8',
8345
        bool $clean_utf8 = false,
8346
        string $lang = null,
8347
        bool $try_to_keep_the_string_length = false
8348
    ): string {
8349 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
8350
    }
8351
8352
    /**
8353
     * alias for "UTF8::ucfirst()"
8354
     *
8355
     * @param string      $str
8356
     * @param string      $encoding
8357
     * @param bool        $clean_utf8
8358
     * @param string|null $lang
8359
     * @param bool        $try_to_keep_the_string_length
8360
     *
8361
     * @return string
8362
     *
8363
     * @see UTF8::ucfirst()
8364
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
8365
     */
8366 5
    public static function str_upper_first(
8367
        string $str,
8368
        string $encoding = 'UTF-8',
8369
        bool $clean_utf8 = false,
8370
        string $lang = null,
8371
        bool $try_to_keep_the_string_length = false
8372
    ): string {
8373 5
        return self::ucfirst(
8374 5
            $str,
8375 5
            $encoding,
8376 5
            $clean_utf8,
8377 5
            $lang,
8378 5
            $try_to_keep_the_string_length
8379
        );
8380
    }
8381
8382
    /**
8383
     * Get the number of words in a specific string.
8384
     *
8385
     * @param string $str       <p>The input string.</p>
8386
     * @param int    $format    [optional] <p>
8387
     *                          <strong>0</strong> => return a number of words (default)<br>
8388
     *                          <strong>1</strong> => return an array of words<br>
8389
     *                          <strong>2</strong> => return an array of words with word-offset as key
8390
     *                          </p>
8391
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8392
     *
8393
     * @return int|string[] The number of words in the string
8394
     */
8395 2
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
8396
    {
8397 2
        $str_parts = self::str_to_words($str, $char_list);
8398
8399 2
        $len = \count($str_parts);
8400
8401 2
        if ($format === 1) {
8402 2
            $number_of_words = [];
8403 2
            for ($i = 1; $i < $len; $i += 2) {
8404 2
                $number_of_words[] = $str_parts[$i];
8405
            }
8406 2
        } elseif ($format === 2) {
8407 2
            $number_of_words = [];
8408 2
            $offset = (int) self::strlen($str_parts[0]);
8409 2
            for ($i = 1; $i < $len; $i += 2) {
8410 2
                $number_of_words[$offset] = $str_parts[$i];
8411 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
8412
            }
8413
        } else {
8414 2
            $number_of_words = (int) (($len - 1) / 2);
8415
        }
8416
8417 2
        return $number_of_words;
8418
    }
8419
8420
    /**
8421
     * Case-insensitive string comparison.
8422
     *
8423
     * INFO: Case-insensitive version of UTF8::strcmp()
8424
     *
8425
     * @param string $str1     <p>The first string.</p>
8426
     * @param string $str2     <p>The second string.</p>
8427
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8428
     *
8429
     * @return int
8430
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8431
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8432
     *             <strong>0</strong> if they are equal
8433
     */
8434 23
    public static function strcasecmp(
8435
        string $str1,
8436
        string $str2,
8437
        string $encoding = 'UTF-8'
8438
    ): int {
8439 23
        return self::strcmp(
8440 23
            self::strtocasefold(
8441 23
                $str1,
8442 23
                true,
8443 23
                false,
8444 23
                $encoding,
8445 23
                null,
8446 23
                false
8447
            ),
8448 23
            self::strtocasefold(
8449 23
                $str2,
8450 23
                true,
8451 23
                false,
8452 23
                $encoding,
8453 23
                null,
8454 23
                false
8455
            )
8456
        );
8457
    }
8458
8459
    /**
8460
     * alias for "UTF8::strstr()"
8461
     *
8462
     * @param string $haystack
8463
     * @param string $needle
8464
     * @param bool   $before_needle
8465
     * @param string $encoding
8466
     * @param bool   $clean_utf8
8467
     *
8468
     * @return false|string
8469
     *
8470
     * @see UTF8::strstr()
8471
     * @deprecated <p>please use "UTF8::strstr()"</p>
8472
     */
8473 2
    public static function strchr(
8474
        string $haystack,
8475
        string $needle,
8476
        bool $before_needle = false,
8477
        string $encoding = 'UTF-8',
8478
        bool $clean_utf8 = false
8479
    ) {
8480 2
        return self::strstr(
8481 2
            $haystack,
8482 2
            $needle,
8483 2
            $before_needle,
8484 2
            $encoding,
8485 2
            $clean_utf8
8486
        );
8487
    }
8488
8489
    /**
8490
     * Case-sensitive string comparison.
8491
     *
8492
     * @param string $str1 <p>The first string.</p>
8493
     * @param string $str2 <p>The second string.</p>
8494
     *
8495
     * @return int
8496
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8497
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8498
     *             <strong>0</strong> if they are equal
8499
     */
8500 29
    public static function strcmp(string $str1, string $str2): int
8501
    {
8502 29
        if ($str1 === $str2) {
8503 21
            return 0;
8504
        }
8505
8506 24
        return \strcmp(
8507 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8508 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8509
        );
8510
    }
8511
8512
    /**
8513
     * Find length of initial segment not matching mask.
8514
     *
8515
     * @param string $str
8516
     * @param string $char_list
8517
     * @param int    $offset
8518
     * @param int    $length
8519
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8520
     *
8521
     * @return int
8522
     */
8523 12
    public static function strcspn(
8524
        string $str,
8525
        string $char_list,
8526
        int $offset = null,
8527
        int $length = null,
8528
        string $encoding = 'UTF-8'
8529
    ): int {
8530 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8531
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8532
        }
8533
8534 12
        if ($char_list === '') {
8535 2
            return (int) self::strlen($str, $encoding);
8536
        }
8537
8538 11
        if ($offset !== null || $length !== null) {
8539 3
            if ($encoding === 'UTF-8') {
8540 3
                if ($length === null) {
8541
                    /** @noinspection UnnecessaryCastingInspection */
8542 2
                    $str_tmp = \mb_substr($str, (int) $offset);
8543
                } else {
8544
                    /** @noinspection UnnecessaryCastingInspection */
8545 3
                    $str_tmp = \mb_substr($str, (int) $offset, $length);
8546
                }
8547
            } else {
8548
                /** @noinspection UnnecessaryCastingInspection */
8549
                $str_tmp = self::substr($str, (int) $offset, $length, $encoding);
8550
            }
8551
8552 3
            if ($str_tmp === false) {
8553
                return 0;
8554
            }
8555
8556
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
8557 3
            $str = $str_tmp;
8558
        }
8559
8560 11
        if ($str === '') {
8561 2
            return 0;
8562
        }
8563
8564 10
        $matches = [];
8565 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
8566 9
            $return = self::strlen($matches[1], $encoding);
8567 9
            if ($return === false) {
8568
                return 0;
8569
            }
8570
8571 9
            return $return;
8572
        }
8573
8574 2
        return (int) self::strlen($str, $encoding);
8575
    }
8576
8577
    /**
8578
     * alias for "UTF8::stristr()"
8579
     *
8580
     * @param string $haystack
8581
     * @param string $needle
8582
     * @param bool   $before_needle
8583
     * @param string $encoding
8584
     * @param bool   $clean_utf8
8585
     *
8586
     * @return false|string
8587
     *
8588
     * @see UTF8::stristr()
8589
     * @deprecated <p>please use "UTF8::stristr()"</p>
8590
     */
8591 1
    public static function strichr(
8592
        string $haystack,
8593
        string $needle,
8594
        bool $before_needle = false,
8595
        string $encoding = 'UTF-8',
8596
        bool $clean_utf8 = false
8597
    ) {
8598 1
        return self::stristr(
8599 1
            $haystack,
8600 1
            $needle,
8601 1
            $before_needle,
8602 1
            $encoding,
8603 1
            $clean_utf8
8604
        );
8605
    }
8606
8607
    /**
8608
     * Create a UTF-8 string from code points.
8609
     *
8610
     * INFO: opposite to UTF8::codepoints()
8611
     *
8612
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8613
     *
8614
     * @return string
8615
     *                <p>A UTF-8 encoded string.</p>
8616
     */
8617 4
    public static function string(array $array): string
8618
    {
8619 4
        if ($array === []) {
8620 4
            return  '';
8621
        }
8622
8623 4
        $str = '';
8624 4
        foreach ($array as $strPart) {
8625 4
            $str .= '&#' . (int) $strPart . ';';
8626
        }
8627
8628 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
8629
    }
8630
8631
    /**
8632
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8633
     *
8634
     * @param string $str <p>The input string.</p>
8635
     *
8636
     * @return bool
8637
     *              <strong>true</strong> if the string has BOM at the start,<br>
8638
     *              <strong>false</strong> otherwise
8639
     */
8640 6
    public static function string_has_bom(string $str): bool
8641
    {
8642
        /** @noinspection PhpUnusedLocalVariableInspection */
8643 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
8644 6
            if (\strpos($str, $bom_string) === 0) {
8645 6
                return true;
8646
            }
8647
        }
8648
8649 6
        return false;
8650
    }
8651
8652
    /**
8653
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8654
     *
8655
     * @see http://php.net/manual/en/function.strip-tags.php
8656
     *
8657
     * @param string $str            <p>
8658
     *                               The input string.
8659
     *                               </p>
8660
     * @param string $allowable_tags [optional] <p>
8661
     *                               You can use the optional second parameter to specify tags which should
8662
     *                               not be stripped.
8663
     *                               </p>
8664
     *                               <p>
8665
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8666
     *                               can not be changed with allowable_tags.
8667
     *                               </p>
8668
     * @param bool   $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8669
     *
8670
     * @return string
8671
     *                <p>The stripped string.</p>
8672
     */
8673 4
    public static function strip_tags(
8674
        string $str,
8675
        string $allowable_tags = null,
8676
        bool $clean_utf8 = false
8677
    ): string {
8678 4
        if ($str === '') {
8679 1
            return '';
8680
        }
8681
8682 4
        if ($clean_utf8 === true) {
8683 2
            $str = self::clean($str);
8684
        }
8685
8686 4
        if ($allowable_tags === null) {
8687 4
            return \strip_tags($str);
8688
        }
8689
8690 2
        return \strip_tags($str, $allowable_tags);
8691
    }
8692
8693
    /**
8694
     * Strip all whitespace characters. This includes tabs and newline
8695
     * characters, as well as multibyte whitespace such as the thin space
8696
     * and ideographic space.
8697
     *
8698
     * @param string $str
8699
     *
8700
     * @return string
8701
     */
8702 36
    public static function strip_whitespace(string $str): string
8703
    {
8704 36
        if ($str === '') {
8705 3
            return '';
8706
        }
8707
8708 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8709
    }
8710
8711
    /**
8712
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
8713
     *
8714
     * @see http://php.net/manual/en/function.mb-stripos.php
8715
     *
8716
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
8717
     * @param string $needle     <p>The string to find in haystack.</p>
8718
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
8719
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8720
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8721
     *
8722
     * @return false|int
8723
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8724
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8725
     */
8726 24
    public static function stripos(
8727
        string $haystack,
8728
        string $needle,
8729
        int $offset = 0,
8730
        $encoding = 'UTF-8',
8731
        bool $clean_utf8 = false
8732
    ) {
8733 24
        if ($haystack === '' || $needle === '') {
8734 5
            return false;
8735
        }
8736
8737 23
        if ($clean_utf8 === true) {
8738
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8739
            // if invalid characters are found in $haystack before $needle
8740 1
            $haystack = self::clean($haystack);
8741 1
            $needle = self::clean($needle);
8742
        }
8743
8744 23
        if (self::$SUPPORT['mbstring'] === true) {
8745 23
            if ($encoding === 'UTF-8') {
8746 23
                return \mb_stripos($haystack, $needle, $offset);
8747
            }
8748
8749 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8750
8751 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8752
        }
8753
8754 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8755
8756
        if (
8757 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8758
            &&
8759 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8760
            &&
8761 2
            self::$SUPPORT['intl'] === true
8762
        ) {
8763
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
8764
            if ($return_tmp !== false) {
8765
                return $return_tmp;
8766
            }
8767
        }
8768
8769
        //
8770
        // fallback for ascii only
8771
        //
8772
8773 2
        if (ASCII::is_ascii($haystack . $needle)) {
8774
            return \stripos($haystack, $needle, $offset);
8775
        }
8776
8777
        //
8778
        // fallback via vanilla php
8779
        //
8780
8781 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8782 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8783
8784 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8785
    }
8786
8787
    /**
8788
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8789
     *
8790
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8791
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8792
     * @param bool   $before_needle [optional] <p>
8793
     *                              If <b>TRUE</b>, it returns the part of the
8794
     *                              haystack before the first occurrence of the needle (excluding the needle).
8795
     *                              </p>
8796
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8797
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
8798
     *
8799
     * @return false|string
8800
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
8801
     */
8802 12
    public static function stristr(
8803
        string $haystack,
8804
        string $needle,
8805
        bool $before_needle = false,
8806
        string $encoding = 'UTF-8',
8807
        bool $clean_utf8 = false
8808
    ) {
8809 12
        if ($haystack === '' || $needle === '') {
8810 3
            return false;
8811
        }
8812
8813 9
        if ($clean_utf8 === true) {
8814
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8815
            // if invalid characters are found in $haystack before $needle
8816 1
            $needle = self::clean($needle);
8817 1
            $haystack = self::clean($haystack);
8818
        }
8819
8820 9
        if (!$needle) {
8821
            return $haystack;
8822
        }
8823
8824 9
        if (self::$SUPPORT['mbstring'] === true) {
8825 9
            if ($encoding === 'UTF-8') {
8826 9
                return \mb_stristr($haystack, $needle, $before_needle);
8827
            }
8828
8829 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8830
8831 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8832
        }
8833
8834
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8835
8836
        if (
8837
            $encoding !== 'UTF-8'
8838
            &&
8839
            self::$SUPPORT['mbstring'] === false
8840
        ) {
8841
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8842
        }
8843
8844
        if (
8845
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8846
            &&
8847
            self::$SUPPORT['intl'] === true
8848
        ) {
8849
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
8850
            if ($return_tmp !== false) {
8851
                return $return_tmp;
8852
            }
8853
        }
8854
8855
        if (ASCII::is_ascii($needle . $haystack)) {
8856
            return \stristr($haystack, $needle, $before_needle);
8857
        }
8858
8859
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8860
8861
        if (!isset($match[1])) {
8862
            return false;
8863
        }
8864
8865
        if ($before_needle) {
8866
            return $match[1];
8867
        }
8868
8869
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8870
    }
8871
8872
    /**
8873
     * Get the string length, not the byte-length!
8874
     *
8875
     * @see http://php.net/manual/en/function.mb-strlen.php
8876
     *
8877
     * @param string $str        <p>The string being checked for length.</p>
8878
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
8879
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8880
     *
8881
     * @return false|int
8882
     *                   <p>
8883
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8884
     *                   $encoding.
8885
     *                   (One multi-byte character counted as +1).
8886
     *                   <br>
8887
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8888
     *                   chars.
8889
     *                   </p>
8890
     */
8891 173
    public static function strlen(
8892
        string $str,
8893
        string $encoding = 'UTF-8',
8894
        bool $clean_utf8 = false
8895
    ) {
8896 173
        if ($str === '') {
8897 21
            return 0;
8898
        }
8899
8900 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8901 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8902
        }
8903
8904 171
        if ($clean_utf8 === true) {
8905
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8906
            // if invalid characters are found in $str
8907 4
            $str = self::clean($str);
8908
        }
8909
8910
        //
8911
        // fallback via mbstring
8912
        //
8913
8914 171
        if (self::$SUPPORT['mbstring'] === true) {
8915 165
            if ($encoding === 'UTF-8') {
8916 165
                return \mb_strlen($str);
8917
            }
8918
8919 4
            return \mb_strlen($str, $encoding);
8920
        }
8921
8922
        //
8923
        // fallback for binary || ascii only
8924
        //
8925
8926
        if (
8927 8
            $encoding === 'CP850'
8928
            ||
8929 8
            $encoding === 'ASCII'
8930
        ) {
8931
            return \strlen($str);
8932
        }
8933
8934
        if (
8935 8
            $encoding !== 'UTF-8'
8936
            &&
8937 8
            self::$SUPPORT['mbstring'] === false
8938
            &&
8939 8
            self::$SUPPORT['iconv'] === false
8940
        ) {
8941 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8942
        }
8943
8944
        //
8945
        // fallback via iconv
8946
        //
8947
8948 8
        if (self::$SUPPORT['iconv'] === true) {
8949
            $return_tmp = \iconv_strlen($str, $encoding);
8950
            if ($return_tmp !== false) {
8951
                return $return_tmp;
8952
            }
8953
        }
8954
8955
        //
8956
        // fallback via intl
8957
        //
8958
8959
        if (
8960 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8961
            &&
8962 8
            self::$SUPPORT['intl'] === true
8963
        ) {
8964
            $return_tmp = \grapheme_strlen($str);
8965
            if ($return_tmp !== null) {
8966
                return $return_tmp;
8967
            }
8968
        }
8969
8970
        //
8971
        // fallback for ascii only
8972
        //
8973
8974 8
        if (ASCII::is_ascii($str)) {
8975 4
            return \strlen($str);
8976
        }
8977
8978
        //
8979
        // fallback via vanilla php
8980
        //
8981
8982 8
        \preg_match_all('/./us', $str, $parts);
8983
8984 8
        $return_tmp = \count($parts[0]);
8985 8
        if ($return_tmp === 0) {
8986
            return false;
8987
        }
8988
8989 8
        return $return_tmp;
8990
    }
8991
8992
    /**
8993
     * Get string length in byte.
8994
     *
8995
     * @param string $str
8996
     *
8997
     * @return int
8998
     */
8999
    public static function strlen_in_byte(string $str): int
9000
    {
9001
        if ($str === '') {
9002
            return 0;
9003
        }
9004
9005
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9006
            // "mb_" is available if overload is used, so use it ...
9007
            return \mb_strlen($str, 'CP850'); // 8-BIT
9008
        }
9009
9010
        return \strlen($str);
9011
    }
9012
9013
    /**
9014
     * Case-insensitive string comparisons using a "natural order" algorithm.
9015
     *
9016
     * INFO: natural order version of UTF8::strcasecmp()
9017
     *
9018
     * @param string $str1     <p>The first string.</p>
9019
     * @param string $str2     <p>The second string.</p>
9020
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9021
     *
9022
     * @return int
9023
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9024
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9025
     *             <strong>0</strong> if they are equal
9026
     */
9027 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
9028
    {
9029 2
        return self::strnatcmp(
9030 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9031 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
9032
        );
9033
    }
9034
9035
    /**
9036
     * String comparisons using a "natural order" algorithm
9037
     *
9038
     * INFO: natural order version of UTF8::strcmp()
9039
     *
9040
     * @see http://php.net/manual/en/function.strnatcmp.php
9041
     *
9042
     * @param string $str1 <p>The first string.</p>
9043
     * @param string $str2 <p>The second string.</p>
9044
     *
9045
     * @return int
9046
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9047
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
9048
     *             <strong>0</strong> if they are equal
9049
     */
9050 4
    public static function strnatcmp(string $str1, string $str2): int
9051
    {
9052 4
        if ($str1 === $str2) {
9053 4
            return 0;
9054
        }
9055
9056 4
        return \strnatcmp(
9057 4
            (string) self::strtonatfold($str1),
9058 4
            (string) self::strtonatfold($str2)
9059
        );
9060
    }
9061
9062
    /**
9063
     * Case-insensitive string comparison of the first n characters.
9064
     *
9065
     * @see http://php.net/manual/en/function.strncasecmp.php
9066
     *
9067
     * @param string $str1     <p>The first string.</p>
9068
     * @param string $str2     <p>The second string.</p>
9069
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
9070
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9071
     *
9072
     * @return int
9073
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9074
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9075
     *             <strong>0</strong> if they are equal
9076
     */
9077 2
    public static function strncasecmp(
9078
        string $str1,
9079
        string $str2,
9080
        int $len,
9081
        string $encoding = 'UTF-8'
9082
    ): int {
9083 2
        return self::strncmp(
9084 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
9085 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
9086 2
            $len
9087
        );
9088
    }
9089
9090
    /**
9091
     * String comparison of the first n characters.
9092
     *
9093
     * @see http://php.net/manual/en/function.strncmp.php
9094
     *
9095
     * @param string $str1     <p>The first string.</p>
9096
     * @param string $str2     <p>The second string.</p>
9097
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
9098
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9099
     *
9100
     * @return int
9101
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
9102
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
9103
     *             <strong>0</strong> if they are equal
9104
     */
9105 4
    public static function strncmp(
9106
        string $str1,
9107
        string $str2,
9108
        int $len,
9109
        string $encoding = 'UTF-8'
9110
    ): int {
9111 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9112
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9113
        }
9114
9115 4
        if ($encoding === 'UTF-8') {
9116 4
            $str1 = (string) \mb_substr($str1, 0, $len);
9117 4
            $str2 = (string) \mb_substr($str2, 0, $len);
9118
        } else {
9119
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
9120
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
9121
        }
9122
9123 4
        return self::strcmp($str1, $str2);
9124
    }
9125
9126
    /**
9127
     * Search a string for any of a set of characters.
9128
     *
9129
     * @see http://php.net/manual/en/function.strpbrk.php
9130
     *
9131
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9132
     * @param string $char_list <p>This parameter is case-sensitive.</p>
9133
     *
9134
     * @return false|string string starting from the character found, or false if it is not found
9135
     */
9136 2
    public static function strpbrk(string $haystack, string $char_list)
9137
    {
9138 2
        if ($haystack === '' || $char_list === '') {
9139 2
            return false;
9140
        }
9141
9142 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9143 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9144
        }
9145
9146 2
        return false;
9147
    }
9148
9149
    /**
9150
     * Find the position of the first occurrence of a substring in a string.
9151
     *
9152
     * @see http://php.net/manual/en/function.mb-strpos.php
9153
     *
9154
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9155
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9156
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9157
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9158
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9159
     *
9160
     * @return false|int
9161
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9162
     *                   string.<br> If needle is not found it returns false.
9163
     */
9164 53
    public static function strpos(
9165
        string $haystack,
9166
        $needle,
9167
        int $offset = 0,
9168
        $encoding = 'UTF-8',
9169
        bool $clean_utf8 = false
9170
    ) {
9171 53
        if ($haystack === '') {
9172 4
            return false;
9173
        }
9174
9175
        // iconv and mbstring do not support integer $needle
9176 52
        if ((int) $needle === $needle) {
9177
            $needle = (string) self::chr($needle);
9178
        }
9179 52
        $needle = (string) $needle;
9180
9181 52
        if ($needle === '') {
9182 2
            return false;
9183
        }
9184
9185 52
        if ($clean_utf8 === true) {
9186
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9187
            // if invalid characters are found in $haystack before $needle
9188 3
            $needle = self::clean($needle);
9189 3
            $haystack = self::clean($haystack);
9190
        }
9191
9192 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9193 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9194
        }
9195
9196
        //
9197
        // fallback via mbstring
9198
        //
9199
9200 52
        if (self::$SUPPORT['mbstring'] === true) {
9201 50
            if ($encoding === 'UTF-8') {
9202 50
                return \mb_strpos($haystack, $needle, $offset);
9203
            }
9204
9205 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9206
        }
9207
9208
        //
9209
        // fallback for binary || ascii only
9210
        //
9211
        if (
9212 4
            $encoding === 'CP850'
9213
            ||
9214 4
            $encoding === 'ASCII'
9215
        ) {
9216 2
            return \strpos($haystack, $needle, $offset);
9217
        }
9218
9219
        if (
9220 4
            $encoding !== 'UTF-8'
9221
            &&
9222 4
            self::$SUPPORT['iconv'] === false
9223
            &&
9224 4
            self::$SUPPORT['mbstring'] === false
9225
        ) {
9226 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9227
        }
9228
9229
        //
9230
        // fallback via intl
9231
        //
9232
9233
        if (
9234 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9235
            &&
9236 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9237
            &&
9238 4
            self::$SUPPORT['intl'] === true
9239
        ) {
9240
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
9241
            if ($return_tmp !== false) {
9242
                return $return_tmp;
9243
            }
9244
        }
9245
9246
        //
9247
        // fallback via iconv
9248
        //
9249
9250
        if (
9251 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9252
            &&
9253 4
            self::$SUPPORT['iconv'] === true
9254
        ) {
9255
            // ignore invalid negative offset to keep compatibility
9256
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9257
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9258
            if ($return_tmp !== false) {
9259
                return $return_tmp;
9260
            }
9261
        }
9262
9263
        //
9264
        // fallback for ascii only
9265
        //
9266
9267 4
        if (ASCII::is_ascii($haystack . $needle)) {
9268 2
            return \strpos($haystack, $needle, $offset);
9269
        }
9270
9271
        //
9272
        // fallback via vanilla php
9273
        //
9274
9275 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
9276 4
        if ($haystack_tmp === false) {
9277
            $haystack_tmp = '';
9278
        }
9279 4
        $haystack = (string) $haystack_tmp;
9280
9281 4
        if ($offset < 0) {
9282
            $offset = 0;
9283
        }
9284
9285 4
        $pos = \strpos($haystack, $needle);
9286 4
        if ($pos === false) {
9287 2
            return false;
9288
        }
9289
9290 4
        if ($pos) {
9291 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9292
        }
9293
9294 2
        return $offset + 0;
9295
    }
9296
9297
    /**
9298
     * Find the position of the first occurrence of a substring in a string.
9299
     *
9300
     * @param string $haystack <p>
9301
     *                         The string being checked.
9302
     *                         </p>
9303
     * @param string $needle   <p>
9304
     *                         The position counted from the beginning of haystack.
9305
     *                         </p>
9306
     * @param int    $offset   [optional] <p>
9307
     *                         The search offset. If it is not specified, 0 is used.
9308
     *                         </p>
9309
     *
9310
     * @return false|int The numeric position of the first occurrence of needle in the
9311
     *                   haystack string. If needle is not found, it returns false.
9312
     */
9313
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9314
    {
9315
        if ($haystack === '' || $needle === '') {
9316
            return false;
9317
        }
9318
9319
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9320
            // "mb_" is available if overload is used, so use it ...
9321
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9322
        }
9323
9324
        return \strpos($haystack, $needle, $offset);
9325
    }
9326
9327
    /**
9328
     * Find the last occurrence of a character in a string within another.
9329
     *
9330
     * @see http://php.net/manual/en/function.mb-strrchr.php
9331
     *
9332
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9333
     * @param string $needle        <p>The string to find in haystack</p>
9334
     * @param bool   $before_needle [optional] <p>
9335
     *                              Determines which portion of haystack
9336
     *                              this function returns.
9337
     *                              If set to true, it returns all of haystack
9338
     *                              from the beginning to the last occurrence of needle.
9339
     *                              If set to false, it returns all of haystack
9340
     *                              from the last occurrence of needle to the end,
9341
     *                              </p>
9342
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9343
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9344
     *
9345
     * @return false|string the portion of haystack or false if needle is not found
9346
     */
9347 2
    public static function strrchr(
9348
        string $haystack,
9349
        string $needle,
9350
        bool $before_needle = false,
9351
        string $encoding = 'UTF-8',
9352
        bool $clean_utf8 = false
9353
    ) {
9354 2
        if ($haystack === '' || $needle === '') {
9355 2
            return false;
9356
        }
9357
9358 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9359 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9360
        }
9361
9362 2
        if ($clean_utf8 === true) {
9363
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9364
            // if invalid characters are found in $haystack before $needle
9365 2
            $needle = self::clean($needle);
9366 2
            $haystack = self::clean($haystack);
9367
        }
9368
9369
        //
9370
        // fallback via mbstring
9371
        //
9372
9373 2
        if (self::$SUPPORT['mbstring'] === true) {
9374 2
            if ($encoding === 'UTF-8') {
9375 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9376
            }
9377
9378 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9379
        }
9380
9381
        //
9382
        // fallback for binary || ascii only
9383
        //
9384
9385
        if (
9386
            $before_needle === false
9387
            &&
9388
            (
9389
                $encoding === 'CP850'
9390
                ||
9391
                $encoding === 'ASCII'
9392
            )
9393
        ) {
9394
            return \strrchr($haystack, $needle);
9395
        }
9396
9397
        if (
9398
            $encoding !== 'UTF-8'
9399
            &&
9400
            self::$SUPPORT['mbstring'] === false
9401
        ) {
9402
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9403
        }
9404
9405
        //
9406
        // fallback via iconv
9407
        //
9408
9409
        if (self::$SUPPORT['iconv'] === true) {
9410
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
9411
            if ($needle_tmp === false) {
9412
                return false;
9413
            }
9414
            $needle = (string) $needle_tmp;
9415
9416
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9417
            if ($pos === false) {
9418
                return false;
9419
            }
9420
9421
            if ($before_needle) {
9422
                return self::substr($haystack, 0, $pos, $encoding);
9423
            }
9424
9425
            return self::substr($haystack, $pos, null, $encoding);
9426
        }
9427
9428
        //
9429
        // fallback via vanilla php
9430
        //
9431
9432
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9433
        if ($needle_tmp === false) {
9434
            return false;
9435
        }
9436
        $needle = (string) $needle_tmp;
9437
9438
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9439
        if ($pos === false) {
9440
            return false;
9441
        }
9442
9443
        if ($before_needle) {
9444
            return self::substr($haystack, 0, $pos, $encoding);
9445
        }
9446
9447
        return self::substr($haystack, $pos, null, $encoding);
9448
    }
9449
9450
    /**
9451
     * Reverses characters order in the string.
9452
     *
9453
     * @param string $str      <p>The input string.</p>
9454
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9455
     *
9456
     * @return string the string with characters in the reverse sequence
9457
     */
9458 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9459
    {
9460 10
        if ($str === '') {
9461 4
            return '';
9462
        }
9463
9464
        // init
9465 8
        $reversed = '';
9466
9467 8
        $str = self::emoji_encode($str, true);
9468
9469 8
        if ($encoding === 'UTF-8') {
9470 8
            if (self::$SUPPORT['intl'] === true) {
9471
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9472 8
                $i = (int) \grapheme_strlen($str);
9473 8
                while ($i--) {
9474 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
9475 8
                    if ($reversed_tmp !== false) {
9476 8
                        $reversed .= $reversed_tmp;
9477
                    }
9478
                }
9479
            } else {
9480
                $i = (int) \mb_strlen($str);
9481 8
                while ($i--) {
9482
                    $reversed_tmp = \mb_substr($str, $i, 1);
9483
                    if ($reversed_tmp !== false) {
9484
                        $reversed .= $reversed_tmp;
9485
                    }
9486
                }
9487
            }
9488
        } else {
9489
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9490
9491
            $i = (int) self::strlen($str, $encoding);
9492
            while ($i--) {
9493
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
9494
                if ($reversed_tmp !== false) {
9495
                    $reversed .= $reversed_tmp;
9496
                }
9497
            }
9498
        }
9499
9500 8
        return self::emoji_decode($reversed, true);
9501
    }
9502
9503
    /**
9504
     * Find the last occurrence of a character in a string within another, case-insensitive.
9505
     *
9506
     * @see http://php.net/manual/en/function.mb-strrichr.php
9507
     *
9508
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9509
     * @param string $needle        <p>The string to find in haystack.</p>
9510
     * @param bool   $before_needle [optional] <p>
9511
     *                              Determines which portion of haystack
9512
     *                              this function returns.
9513
     *                              If set to true, it returns all of haystack
9514
     *                              from the beginning to the last occurrence of needle.
9515
     *                              If set to false, it returns all of haystack
9516
     *                              from the last occurrence of needle to the end,
9517
     *                              </p>
9518
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9519
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9520
     *
9521
     * @return false|string the portion of haystack or<br>false if needle is not found
9522
     */
9523 3
    public static function strrichr(
9524
        string $haystack,
9525
        string $needle,
9526
        bool $before_needle = false,
9527
        string $encoding = 'UTF-8',
9528
        bool $clean_utf8 = false
9529
    ) {
9530 3
        if ($haystack === '' || $needle === '') {
9531 2
            return false;
9532
        }
9533
9534 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9535 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9536
        }
9537
9538 3
        if ($clean_utf8 === true) {
9539
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9540
            // if invalid characters are found in $haystack before $needle
9541 2
            $needle = self::clean($needle);
9542 2
            $haystack = self::clean($haystack);
9543
        }
9544
9545
        //
9546
        // fallback via mbstring
9547
        //
9548
9549 3
        if (self::$SUPPORT['mbstring'] === true) {
9550 3
            if ($encoding === 'UTF-8') {
9551 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9552
            }
9553
9554 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9555
        }
9556
9557
        //
9558
        // fallback via vanilla php
9559
        //
9560
9561
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
9562
        if ($needle_tmp === false) {
9563
            return false;
9564
        }
9565
        $needle = (string) $needle_tmp;
9566
9567
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9568
        if ($pos === false) {
9569
            return false;
9570
        }
9571
9572
        if ($before_needle) {
9573
            return self::substr($haystack, 0, $pos, $encoding);
9574
        }
9575
9576
        return self::substr($haystack, $pos, null, $encoding);
9577
    }
9578
9579
    /**
9580
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
9581
     *
9582
     * @param string     $haystack   <p>The string to look in.</p>
9583
     * @param int|string $needle     <p>The string to look for.</p>
9584
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
9585
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9586
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9587
     *
9588
     * @return false|int
9589
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9590
     *                   string.<br>If needle is not found, it returns false.</p>
9591
     */
9592 3
    public static function strripos(
9593
        string $haystack,
9594
        $needle,
9595
        int $offset = 0,
9596
        string $encoding = 'UTF-8',
9597
        bool $clean_utf8 = false
9598
    ) {
9599 3
        if ($haystack === '') {
9600
            return false;
9601
        }
9602
9603
        // iconv and mbstring do not support integer $needle
9604 3
        if ((int) $needle === $needle && $needle >= 0) {
9605
            $needle = (string) self::chr($needle);
9606
        }
9607 3
        $needle = (string) $needle;
9608
9609 3
        if ($needle === '') {
9610
            return false;
9611
        }
9612
9613 3
        if ($clean_utf8 === true) {
9614
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9615 2
            $needle = self::clean($needle);
9616 2
            $haystack = self::clean($haystack);
9617
        }
9618
9619 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9620 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9621
        }
9622
9623
        //
9624
        // fallback via mbstrig
9625
        //
9626
9627 3
        if (self::$SUPPORT['mbstring'] === true) {
9628 3
            if ($encoding === 'UTF-8') {
9629 3
                return \mb_strripos($haystack, $needle, $offset);
9630
            }
9631
9632
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9633
        }
9634
9635
        //
9636
        // fallback for binary || ascii only
9637
        //
9638
9639
        if (
9640
            $encoding === 'CP850'
9641
            ||
9642
            $encoding === 'ASCII'
9643
        ) {
9644
            return \strripos($haystack, $needle, $offset);
9645
        }
9646
9647
        if (
9648
            $encoding !== 'UTF-8'
9649
            &&
9650
            self::$SUPPORT['mbstring'] === false
9651
        ) {
9652
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9653
        }
9654
9655
        //
9656
        // fallback via intl
9657
        //
9658
9659
        if (
9660
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9661
            &&
9662
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9663
            &&
9664
            self::$SUPPORT['intl'] === true
9665
        ) {
9666
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
9667
            if ($return_tmp !== false) {
9668
                return $return_tmp;
9669
            }
9670
        }
9671
9672
        //
9673
        // fallback for ascii only
9674
        //
9675
9676
        if (ASCII::is_ascii($haystack . $needle)) {
9677
            return \strripos($haystack, $needle, $offset);
9678
        }
9679
9680
        //
9681
        // fallback via vanilla php
9682
        //
9683
9684
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9685
        $needle = self::strtocasefold($needle, true, false, $encoding);
9686
9687
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
9688
    }
9689
9690
    /**
9691
     * Finds position of last occurrence of a string within another, case-insensitive.
9692
     *
9693
     * @param string $haystack <p>
9694
     *                         The string from which to get the position of the last occurrence
9695
     *                         of needle.
9696
     *                         </p>
9697
     * @param string $needle   <p>
9698
     *                         The string to find in haystack.
9699
     *                         </p>
9700
     * @param int    $offset   [optional] <p>
9701
     *                         The position in haystack
9702
     *                         to start searching.
9703
     *                         </p>
9704
     *
9705
     * @return false|int
9706
     *                   <p>eturn the numeric position of the last occurrence of needle in the
9707
     *                   haystack string, or false if needle is not found.</p>
9708
     */
9709
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9710
    {
9711
        if ($haystack === '' || $needle === '') {
9712
            return false;
9713
        }
9714
9715
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9716
            // "mb_" is available if overload is used, so use it ...
9717
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9718
        }
9719
9720
        return \strripos($haystack, $needle, $offset);
9721
    }
9722
9723
    /**
9724
     * Find the position of the last occurrence of a substring in a string.
9725
     *
9726
     * @see http://php.net/manual/en/function.mb-strrpos.php
9727
     *
9728
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
9729
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
9730
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
9731
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
9732
     *                               the end of the string.
9733
     *                               </p>
9734
     * @param string     $encoding   [optional] <p>Set the charset.</p>
9735
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9736
     *
9737
     * @return false|int
9738
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9739
     *                   string.<br>If needle is not found, it returns false.</p>
9740
     */
9741 35
    public static function strrpos(
9742
        string $haystack,
9743
        $needle,
9744
        int $offset = 0,
9745
        string $encoding = 'UTF-8',
9746
        bool $clean_utf8 = false
9747
    ) {
9748 35
        if ($haystack === '') {
9749 3
            return false;
9750
        }
9751
9752
        // iconv and mbstring do not support integer $needle
9753 34
        if ((int) $needle === $needle && $needle >= 0) {
9754 2
            $needle = (string) self::chr($needle);
9755
        }
9756 34
        $needle = (string) $needle;
9757
9758 34
        if ($needle === '') {
9759 2
            return false;
9760
        }
9761
9762 34
        if ($clean_utf8 === true) {
9763
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9764 4
            $needle = self::clean($needle);
9765 4
            $haystack = self::clean($haystack);
9766
        }
9767
9768 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9769 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9770
        }
9771
9772
        //
9773
        // fallback via mbstring
9774
        //
9775
9776 34
        if (self::$SUPPORT['mbstring'] === true) {
9777 34
            if ($encoding === 'UTF-8') {
9778 34
                return \mb_strrpos($haystack, $needle, $offset);
9779
            }
9780
9781 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9782
        }
9783
9784
        //
9785
        // fallback for binary || ascii only
9786
        //
9787
9788
        if (
9789
            $encoding === 'CP850'
9790
            ||
9791
            $encoding === 'ASCII'
9792
        ) {
9793
            return \strrpos($haystack, $needle, $offset);
9794
        }
9795
9796
        if (
9797
            $encoding !== 'UTF-8'
9798
            &&
9799
            self::$SUPPORT['mbstring'] === false
9800
        ) {
9801
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9802
        }
9803
9804
        //
9805
        // fallback via intl
9806
        //
9807
9808
        if (
9809
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9810
            &&
9811
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9812
            &&
9813
            self::$SUPPORT['intl'] === true
9814
        ) {
9815
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
9816
            if ($return_tmp !== false) {
9817
                return $return_tmp;
9818
            }
9819
        }
9820
9821
        //
9822
        // fallback for ascii only
9823
        //
9824
9825
        if (ASCII::is_ascii($haystack . $needle)) {
9826
            return \strrpos($haystack, $needle, $offset);
9827
        }
9828
9829
        //
9830
        // fallback via vanilla php
9831
        //
9832
9833
        $haystack_tmp = null;
9834
        if ($offset > 0) {
9835
            $haystack_tmp = self::substr($haystack, $offset);
9836
        } elseif ($offset < 0) {
9837
            $haystack_tmp = self::substr($haystack, 0, $offset);
9838
            $offset = 0;
9839
        }
9840
9841
        if ($haystack_tmp !== null) {
9842
            if ($haystack_tmp === false) {
9843
                $haystack_tmp = '';
9844
            }
9845
            $haystack = (string) $haystack_tmp;
9846
        }
9847
9848
        $pos = \strrpos($haystack, $needle);
9849
        if ($pos === false) {
9850
            return false;
9851
        }
9852
9853
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
9854
        $str_tmp = \substr($haystack, 0, $pos);
9855
        if ($str_tmp === false) {
9856
            return false;
9857
        }
9858
9859
        return $offset + (int) self::strlen($str_tmp);
9860
    }
9861
9862
    /**
9863
     * Find the position of the last occurrence of a substring in a string.
9864
     *
9865
     * @param string $haystack <p>
9866
     *                         The string being checked, for the last occurrence
9867
     *                         of needle.
9868
     *                         </p>
9869
     * @param string $needle   <p>
9870
     *                         The string to find in haystack.
9871
     *                         </p>
9872
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
9873
     *                         the string. Negative values will stop searching at an arbitrary point
9874
     *                         prior to the end of the string.
9875
     *                         </p>
9876
     *
9877
     * @return false|int
9878
     *                   <p>The numeric position of the last occurrence of needle in the
9879
     *                   haystack string. If needle is not found, it returns false.</p>
9880
     */
9881
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9882
    {
9883
        if ($haystack === '' || $needle === '') {
9884
            return false;
9885
        }
9886
9887
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9888
            // "mb_" is available if overload is used, so use it ...
9889
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9890
        }
9891
9892
        return \strrpos($haystack, $needle, $offset);
9893
    }
9894
9895
    /**
9896
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9897
     * mask.
9898
     *
9899
     * @param string $str      <p>The input string.</p>
9900
     * @param string $mask     <p>The mask of chars</p>
9901
     * @param int    $offset   [optional]
9902
     * @param int    $length   [optional]
9903
     * @param string $encoding [optional] <p>Set the charset.</p>
9904
     *
9905
     * @return false|int
9906
     */
9907 10
    public static function strspn(
9908
        string $str,
9909
        string $mask,
9910
        int $offset = 0,
9911
        int $length = null,
9912
        string $encoding = 'UTF-8'
9913
    ) {
9914 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9915
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9916
        }
9917
9918 10
        if ($offset || $length !== null) {
9919 2
            if ($encoding === 'UTF-8') {
9920 2
                if ($length === null) {
9921
                    $str = (string) \mb_substr($str, $offset);
9922
                } else {
9923 2
                    $str = (string) \mb_substr($str, $offset, $length);
9924
                }
9925
            } else {
9926
                $str = (string) self::substr($str, $offset, $length, $encoding);
9927
            }
9928
        }
9929
9930 10
        if ($str === '' || $mask === '') {
9931 2
            return 0;
9932
        }
9933
9934 8
        $matches = [];
9935
9936 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9937
    }
9938
9939
    /**
9940
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9941
     *
9942
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9943
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9944
     * @param bool   $before_needle [optional] <p>
9945
     *                              If <b>TRUE</b>, strstr() returns the part of the
9946
     *                              haystack before the first occurrence of the needle (excluding the needle).
9947
     *                              </p>
9948
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9949
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
9950
     *
9951
     * @return false|string
9952
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9953
     */
9954 3
    public static function strstr(
9955
        string $haystack,
9956
        string $needle,
9957
        bool $before_needle = false,
9958
        string $encoding = 'UTF-8',
9959
        $clean_utf8 = false
9960
    ) {
9961 3
        if ($haystack === '' || $needle === '') {
9962 2
            return false;
9963
        }
9964
9965 3
        if ($clean_utf8 === true) {
9966
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9967
            // if invalid characters are found in $haystack before $needle
9968
            $needle = self::clean($needle);
9969
            $haystack = self::clean($haystack);
9970
        }
9971
9972 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9973 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9974
        }
9975
9976
        //
9977
        // fallback via mbstring
9978
        //
9979
9980 3
        if (self::$SUPPORT['mbstring'] === true) {
9981 3
            if ($encoding === 'UTF-8') {
9982 3
                return \mb_strstr($haystack, $needle, $before_needle);
9983
            }
9984
9985 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9986
        }
9987
9988
        //
9989
        // fallback for binary || ascii only
9990
        //
9991
9992
        if (
9993
            $encoding === 'CP850'
9994
            ||
9995
            $encoding === 'ASCII'
9996
        ) {
9997
            return \strstr($haystack, $needle, $before_needle);
9998
        }
9999
10000
        if (
10001
            $encoding !== 'UTF-8'
10002
            &&
10003
            self::$SUPPORT['mbstring'] === false
10004
        ) {
10005
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10006
        }
10007
10008
        //
10009
        // fallback via intl
10010
        //
10011
10012
        if (
10013
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
10014
            &&
10015
            self::$SUPPORT['intl'] === true
10016
        ) {
10017
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
10018
            if ($return_tmp !== false) {
10019
                return $return_tmp;
10020
            }
10021
        }
10022
10023
        //
10024
        // fallback for ascii only
10025
        //
10026
10027
        if (ASCII::is_ascii($haystack . $needle)) {
10028
            return \strstr($haystack, $needle, $before_needle);
10029
        }
10030
10031
        //
10032
        // fallback via vanilla php
10033
        //
10034
10035
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
10036
10037
        if (!isset($match[1])) {
10038
            return false;
10039
        }
10040
10041
        if ($before_needle) {
10042
            return $match[1];
10043
        }
10044
10045
        return self::substr($haystack, (int) self::strlen($match[1]));
10046
    }
10047
10048
    /**
10049
     *  * Finds first occurrence of a string within another.
10050
     *
10051
     * @param string $haystack      <p>
10052
     *                              The string from which to get the first occurrence
10053
     *                              of needle.
10054
     *                              </p>
10055
     * @param string $needle        <p>
10056
     *                              The string to find in haystack.
10057
     *                              </p>
10058
     * @param bool   $before_needle [optional] <p>
10059
     *                              Determines which portion of haystack
10060
     *                              this function returns.
10061
     *                              If set to true, it returns all of haystack
10062
     *                              from the beginning to the first occurrence of needle.
10063
     *                              If set to false, it returns all of haystack
10064
     *                              from the first occurrence of needle to the end,
10065
     *                              </p>
10066
     *
10067
     * @return false|string
10068
     *                      <p>The portion of haystack,
10069
     *                      or false if needle is not found.</p>
10070
     */
10071
    public static function strstr_in_byte(
10072
        string $haystack,
10073
        string $needle,
10074
        bool $before_needle = false
10075
    ) {
10076
        if ($haystack === '' || $needle === '') {
10077
            return false;
10078
        }
10079
10080
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10081
            // "mb_" is available if overload is used, so use it ...
10082
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
10083
        }
10084
10085
        return \strstr($haystack, $needle, $before_needle);
10086
    }
10087
10088
    /**
10089
     * Unicode transformation for case-less matching.
10090
     *
10091
     * @see http://unicode.org/reports/tr21/tr21-5.html
10092
     *
10093
     * @param string      $str        <p>The input string.</p>
10094
     * @param bool        $full       [optional] <p>
10095
     *                                <b>true</b>, replace full case folding chars (default)<br>
10096
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
10097
     *                                </p>
10098
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10099
     * @param string      $encoding   [optional] <p>Set the charset.</p>
10100
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10101
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
10102
     *                                is for some languages better ...</p>
10103
     *
10104
     * @return string
10105
     */
10106 32
    public static function strtocasefold(
10107
        string $str,
10108
        bool $full = true,
10109
        bool $clean_utf8 = false,
10110
        string $encoding = 'UTF-8',
10111
        string $lang = null,
10112
        $lower = true
10113
    ): string {
10114 32
        if ($str === '') {
10115 5
            return '';
10116
        }
10117
10118 31
        if ($clean_utf8 === true) {
10119
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10120
            // if invalid characters are found in $haystack before $needle
10121 2
            $str = self::clean($str);
10122
        }
10123
10124 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
10125
10126 31
        if ($lang === null && $encoding === 'UTF-8') {
10127 31
            if ($lower === true) {
10128 2
                return \mb_strtolower($str);
10129
            }
10130
10131 29
            return \mb_strtoupper($str);
10132
        }
10133
10134 2
        if ($lower === true) {
10135
            return self::strtolower($str, $encoding, false, $lang);
10136
        }
10137
10138 2
        return self::strtoupper($str, $encoding, false, $lang);
10139
    }
10140
10141
    /**
10142
     * Make a string lowercase.
10143
     *
10144
     * @see http://php.net/manual/en/function.mb-strtolower.php
10145
     *
10146
     * @param string      $str                           <p>The string being lowercased.</p>
10147
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
10148
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10149
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10150
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10151
     *
10152
     * @return string
10153
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10154
     */
10155 73
    public static function strtolower(
10156
        $str,
10157
        string $encoding = 'UTF-8',
10158
        bool $clean_utf8 = false,
10159
        string $lang = null,
10160
        bool $try_to_keep_the_string_length = false
10161
    ): string {
10162
        // init
10163 73
        $str = (string) $str;
10164
10165 73
        if ($str === '') {
10166 1
            return '';
10167
        }
10168
10169 72
        if ($clean_utf8 === true) {
10170
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10171
            // if invalid characters are found in $haystack before $needle
10172 2
            $str = self::clean($str);
10173
        }
10174
10175
        // hack for old php version or for the polyfill ...
10176 72
        if ($try_to_keep_the_string_length === true) {
10177
            $str = self::fixStrCaseHelper($str, true);
10178
        }
10179
10180 72
        if ($lang === null && $encoding === 'UTF-8') {
10181 13
            return \mb_strtolower($str);
10182
        }
10183
10184 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10185
10186 61
        if ($lang !== null) {
10187 2
            if (self::$SUPPORT['intl'] === true) {
10188 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10189
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10190
                }
10191
10192 2
                $language_code = $lang . '-Lower';
10193 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10194
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
10195
10196
                    $language_code = 'Any-Lower';
10197
                }
10198
10199
                /** @noinspection PhpComposerExtensionStubsInspection */
10200
                /** @noinspection UnnecessaryCastingInspection */
10201 2
                return (string) \transliterator_transliterate($language_code, $str);
10202
            }
10203
10204
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10205
        }
10206
10207
        // always fallback via symfony polyfill
10208 61
        return \mb_strtolower($str, $encoding);
10209
    }
10210
10211
    /**
10212
     * Make a string uppercase.
10213
     *
10214
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10215
     *
10216
     * @param string      $str                           <p>The string being uppercased.</p>
10217
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
10218
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
10219
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10220
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10221
     *
10222
     * @return string
10223
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10224
     */
10225 17
    public static function strtoupper(
10226
        $str,
10227
        string $encoding = 'UTF-8',
10228
        bool $clean_utf8 = false,
10229
        string $lang = null,
10230
        bool $try_to_keep_the_string_length = false
10231
    ): string {
10232
        // init
10233 17
        $str = (string) $str;
10234
10235 17
        if ($str === '') {
10236 1
            return '';
10237
        }
10238
10239 16
        if ($clean_utf8 === true) {
10240
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10241
            // if invalid characters are found in $haystack before $needle
10242 2
            $str = self::clean($str);
10243
        }
10244
10245
        // hack for old php version or for the polyfill ...
10246 16
        if ($try_to_keep_the_string_length === true) {
10247 2
            $str = self::fixStrCaseHelper($str, false);
10248
        }
10249
10250 16
        if ($lang === null && $encoding === 'UTF-8') {
10251 8
            return \mb_strtoupper($str);
10252
        }
10253
10254 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10255
10256 10
        if ($lang !== null) {
10257 2
            if (self::$SUPPORT['intl'] === true) {
10258 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10259
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10260
                }
10261
10262 2
                $language_code = $lang . '-Upper';
10263 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
10264
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10265
10266
                    $language_code = 'Any-Upper';
10267
                }
10268
10269
                /** @noinspection PhpComposerExtensionStubsInspection */
10270
                /** @noinspection UnnecessaryCastingInspection */
10271 2
                return (string) \transliterator_transliterate($language_code, $str);
10272
            }
10273
10274
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10275
        }
10276
10277
        // always fallback via symfony polyfill
10278 10
        return \mb_strtoupper($str, $encoding);
10279
    }
10280
10281
    /**
10282
     * Translate characters or replace sub-strings.
10283
     *
10284
     * @see http://php.net/manual/en/function.strtr.php
10285
     *
10286
     * @param string          $str  <p>The string being translated.</p>
10287
     * @param string|string[] $from <p>The string replacing from.</p>
10288
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10289
     *
10290
     * @return string
10291
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from" to the
10292
     *                corresponding character in "to".</p>
10293
     */
10294 2
    public static function strtr(string $str, $from, $to = ''): string
10295
    {
10296 2
        if ($str === '') {
10297
            return '';
10298
        }
10299
10300 2
        if ($from === $to) {
10301
            return $str;
10302
        }
10303
10304 2
        if ($to !== '') {
10305 2
            $from = self::str_split($from);
10306 2
            $to = self::str_split($to);
10307 2
            $count_from = \count($from);
10308 2
            $count_to = \count($to);
10309
10310 2
            if ($count_from > $count_to) {
10311 2
                $from = \array_slice($from, 0, $count_to);
10312 2
            } elseif ($count_from < $count_to) {
10313 2
                $to = \array_slice($to, 0, $count_from);
10314
            }
10315
10316 2
            $from = \array_combine($from, $to);
10317
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
10318 2
            if ($from === false) {
10319
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10320
            }
10321
        }
10322
10323 2
        if (\is_string($from)) {
10324 2
            return \str_replace($from, '', $str);
10325
        }
10326
10327 2
        return \strtr($str, $from);
10328
    }
10329
10330
    /**
10331
     * Return the width of a string.
10332
     *
10333
     * @param string $str        <p>The input string.</p>
10334
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10335
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10336
     *
10337
     * @return int
10338
     */
10339 2
    public static function strwidth(
10340
        string $str,
10341
        string $encoding = 'UTF-8',
10342
        bool $clean_utf8 = false
10343
    ): int {
10344 2
        if ($str === '') {
10345 2
            return 0;
10346
        }
10347
10348 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10349 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10350
        }
10351
10352 2
        if ($clean_utf8 === true) {
10353
            // iconv and mbstring are not tolerant to invalid encoding
10354
            // further, their behaviour is inconsistent with that of PHP's substr
10355 2
            $str = self::clean($str);
10356
        }
10357
10358
        //
10359
        // fallback via mbstring
10360
        //
10361
10362 2
        if (self::$SUPPORT['mbstring'] === true) {
10363 2
            if ($encoding === 'UTF-8') {
10364 2
                return \mb_strwidth($str);
10365
            }
10366
10367
            return \mb_strwidth($str, $encoding);
10368
        }
10369
10370
        //
10371
        // fallback via vanilla php
10372
        //
10373
10374
        if ($encoding !== 'UTF-8') {
10375
            $str = self::encode('UTF-8', $str, false, $encoding);
10376
        }
10377
10378
        $wide = 0;
10379
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10380
10381
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10382
    }
10383
10384
    /**
10385
     * Get part of a string.
10386
     *
10387
     * @see http://php.net/manual/en/function.mb-substr.php
10388
     *
10389
     * @param string $str        <p>The string being checked.</p>
10390
     * @param int    $offset     <p>The first position used in str.</p>
10391
     * @param int    $length     [optional] <p>The maximum length of the returned string.</p>
10392
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10393
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10394
     *
10395
     * @return false|string
10396
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10397
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10398
     *                      characters long, <b>FALSE</b> will be returned.
10399
     */
10400 172
    public static function substr(
10401
        string $str,
10402
        int $offset = 0,
10403
        int $length = null,
10404
        string $encoding = 'UTF-8',
10405
        bool $clean_utf8 = false
10406
    ) {
10407
        // empty string
10408 172
        if ($str === '' || $length === 0) {
10409 8
            return '';
10410
        }
10411
10412 168
        if ($clean_utf8 === true) {
10413
            // iconv and mbstring are not tolerant to invalid encoding
10414
            // further, their behaviour is inconsistent with that of PHP's substr
10415 2
            $str = self::clean($str);
10416
        }
10417
10418
        // whole string
10419 168
        if (!$offset && $length === null) {
10420 7
            return $str;
10421
        }
10422
10423 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10424 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10425
        }
10426
10427
        //
10428
        // fallback via mbstring
10429
        //
10430
10431 163
        if (self::$SUPPORT['mbstring'] === true) {
10432 161
            if ($encoding === 'UTF-8') {
10433 161
                if ($length === null) {
10434 64
                    return \mb_substr($str, $offset);
10435
                }
10436
10437 102
                return \mb_substr($str, $offset, $length);
10438
            }
10439
10440
            return self::substr($str, $offset, $length, $encoding);
10441
        }
10442
10443
        //
10444
        // fallback for binary || ascii only
10445
        //
10446
10447
        if (
10448 4
            $encoding === 'CP850'
10449
            ||
10450 4
            $encoding === 'ASCII'
10451
        ) {
10452
            if ($length === null) {
10453
                return \substr($str, $offset);
10454
            }
10455
10456
            return \substr($str, $offset, $length);
10457
        }
10458
10459
        // otherwise we need the string-length
10460 4
        $str_length = 0;
10461 4
        if ($offset || $length === null) {
10462 4
            $str_length = self::strlen($str, $encoding);
10463
        }
10464
10465
        // e.g.: invalid chars + mbstring not installed
10466 4
        if ($str_length === false) {
10467
            return false;
10468
        }
10469
10470
        // empty string
10471 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10472
            return '';
10473
        }
10474
10475
        // impossible
10476 4
        if ($offset && $offset > $str_length) {
10477
            return '';
10478
        }
10479
10480 4
        if ($length === null) {
10481 4
            $length = (int) $str_length;
10482
        } else {
10483 2
            $length = (int) $length;
10484
        }
10485
10486
        if (
10487 4
            $encoding !== 'UTF-8'
10488
            &&
10489 4
            self::$SUPPORT['mbstring'] === false
10490
        ) {
10491 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10492
        }
10493
10494
        //
10495
        // fallback via intl
10496
        //
10497
10498
        if (
10499 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10500
            &&
10501 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10502
            &&
10503 4
            self::$SUPPORT['intl'] === true
10504
        ) {
10505
            $return_tmp = \grapheme_substr($str, $offset, $length);
10506
            if ($return_tmp !== false) {
10507
                return $return_tmp;
10508
            }
10509
        }
10510
10511
        //
10512
        // fallback via iconv
10513
        //
10514
10515
        if (
10516 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10517
            &&
10518 4
            self::$SUPPORT['iconv'] === true
10519
        ) {
10520
            $return_tmp = \iconv_substr($str, $offset, $length);
10521
            if ($return_tmp !== false) {
10522
                return $return_tmp;
10523
            }
10524
        }
10525
10526
        //
10527
        // fallback for ascii only
10528
        //
10529
10530 4
        if (ASCII::is_ascii($str)) {
10531
            return \substr($str, $offset, $length);
10532
        }
10533
10534
        //
10535
        // fallback via vanilla php
10536
        //
10537
10538
        // split to array, and remove invalid characters
10539 4
        $array = self::str_split($str);
10540
10541
        // extract relevant part, and join to make sting again
10542 4
        return \implode('', \array_slice($array, $offset, $length));
10543
    }
10544
10545
    /**
10546
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
10547
     *
10548
     * @param string   $str1               <p>The main string being compared.</p>
10549
     * @param string   $str2               <p>The secondary string being compared.</p>
10550
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10551
     *                                     counting from the end of the string.</p>
10552
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10553
     *                                     of the length of the str compared to the length of main_str less the
10554
     *                                     offset.</p>
10555
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10556
     *                                     insensitive.</p>
10557
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10558
     *
10559
     * @return int
10560
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10561
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10562
     *             <strong>0</strong> if they are equal
10563
     */
10564 2
    public static function substr_compare(
10565
        string $str1,
10566
        string $str2,
10567
        int $offset = 0,
10568
        int $length = null,
10569
        bool $case_insensitivity = false,
10570
        string $encoding = 'UTF-8'
10571
    ): int {
10572
        if (
10573 2
            $offset !== 0
10574
            ||
10575 2
            $length !== null
10576
        ) {
10577 2
            if ($encoding === 'UTF-8') {
10578 2
                if ($length === null) {
10579 2
                    $str1 = (string) \mb_substr($str1, $offset);
10580
                } else {
10581 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10582
                }
10583 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10584
            } else {
10585
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10586
10587
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10588
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10589
            }
10590
        }
10591
10592 2
        if ($case_insensitivity === true) {
10593 2
            return self::strcasecmp($str1, $str2, $encoding);
10594
        }
10595
10596 2
        return self::strcmp($str1, $str2);
10597
    }
10598
10599
    /**
10600
     * Count the number of substring occurrences.
10601
     *
10602
     * @see http://php.net/manual/en/function.substr-count.php
10603
     *
10604
     * @param string $haystack   <p>The string to search in.</p>
10605
     * @param string $needle     <p>The substring to search for.</p>
10606
     * @param int    $offset     [optional] <p>The offset where to start counting.</p>
10607
     * @param int    $length     [optional] <p>
10608
     *                           The maximum length after the specified offset to search for the
10609
     *                           substring. It outputs a warning if the offset plus the length is
10610
     *                           greater than the haystack length.
10611
     *                           </p>
10612
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10613
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10614
     *
10615
     * @return false|int this functions returns an integer or false if there isn't a string
10616
     */
10617 5
    public static function substr_count(
10618
        string $haystack,
10619
        string $needle,
10620
        int $offset = 0,
10621
        int $length = null,
10622
        string $encoding = 'UTF-8',
10623
        bool $clean_utf8 = false
10624
    ) {
10625 5
        if ($haystack === '' || $needle === '') {
10626 2
            return false;
10627
        }
10628
10629 5
        if ($length === 0) {
10630 2
            return 0;
10631
        }
10632
10633 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10634 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10635
        }
10636
10637 5
        if ($clean_utf8 === true) {
10638
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10639
            // if invalid characters are found in $haystack before $needle
10640
            $needle = self::clean($needle);
10641
            $haystack = self::clean($haystack);
10642
        }
10643
10644 5
        if ($offset || $length > 0) {
10645 2
            if ($length === null) {
10646 2
                $length_tmp = self::strlen($haystack, $encoding);
10647 2
                if ($length_tmp === false) {
10648
                    return false;
10649
                }
10650 2
                $length = (int) $length_tmp;
10651
            }
10652
10653 2
            if ($encoding === 'UTF-8') {
10654 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10655
            } else {
10656 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10657
            }
10658
        }
10659
10660
        if (
10661 5
            $encoding !== 'UTF-8'
10662
            &&
10663 5
            self::$SUPPORT['mbstring'] === false
10664
        ) {
10665
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10666
        }
10667
10668 5
        if (self::$SUPPORT['mbstring'] === true) {
10669 5
            if ($encoding === 'UTF-8') {
10670 5
                return \mb_substr_count($haystack, $needle);
10671
            }
10672
10673 2
            return \mb_substr_count($haystack, $needle, $encoding);
10674
        }
10675
10676
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10677
10678
        return \count($matches);
10679
    }
10680
10681
    /**
10682
     * Count the number of substring occurrences.
10683
     *
10684
     * @param string $haystack <p>
10685
     *                         The string being checked.
10686
     *                         </p>
10687
     * @param string $needle   <p>
10688
     *                         The string being found.
10689
     *                         </p>
10690
     * @param int    $offset   [optional] <p>
10691
     *                         The offset where to start counting
10692
     *                         </p>
10693
     * @param int    $length   [optional] <p>
10694
     *                         The maximum length after the specified offset to search for the
10695
     *                         substring. It outputs a warning if the offset plus the length is
10696
     *                         greater than the haystack length.
10697
     *                         </p>
10698
     *
10699
     * @return false|int the number of times the
10700
     *                   needle substring occurs in the
10701
     *                   haystack string
10702
     */
10703
    public static function substr_count_in_byte(
10704
        string $haystack,
10705
        string $needle,
10706
        int $offset = 0,
10707
        int $length = null
10708
    ) {
10709
        if ($haystack === '' || $needle === '') {
10710
            return 0;
10711
        }
10712
10713
        if (
10714
            ($offset || $length !== null)
10715
            &&
10716
            self::$SUPPORT['mbstring_func_overload'] === true
10717
        ) {
10718
            if ($length === null) {
10719
                $length_tmp = self::strlen($haystack);
10720
                if ($length_tmp === false) {
10721
                    return false;
10722
                }
10723
                $length = (int) $length_tmp;
10724
            }
10725
10726
            if (
10727
                (
10728
                    $length !== 0
10729
                    &&
10730
                    $offset !== 0
10731
                )
10732
                &&
10733
                ($length + $offset) <= 0
10734
                &&
10735
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10736
            ) {
10737
                return false;
10738
            }
10739
10740
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
10741
            $haystack_tmp = \substr($haystack, $offset, $length);
10742
            if ($haystack_tmp === false) {
10743
                $haystack_tmp = '';
10744
            }
10745
            $haystack = (string) $haystack_tmp;
10746
        }
10747
10748
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10749
            // "mb_" is available if overload is used, so use it ...
10750
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10751
        }
10752
10753
        if ($length === null) {
10754
            return \substr_count($haystack, $needle, $offset);
10755
        }
10756
10757
        return \substr_count($haystack, $needle, $offset, $length);
10758
    }
10759
10760
    /**
10761
     * Returns the number of occurrences of $substring in the given string.
10762
     * By default, the comparison is case-sensitive, but can be made insensitive
10763
     * by setting $case_sensitive to false.
10764
     *
10765
     * @param string $str            <p>The input string.</p>
10766
     * @param string $substring      <p>The substring to search for.</p>
10767
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10768
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
10769
     *
10770
     * @return int
10771
     */
10772 15
    public static function substr_count_simple(
10773
        string $str,
10774
        string $substring,
10775
        bool $case_sensitive = true,
10776
        string $encoding = 'UTF-8'
10777
    ): int {
10778 15
        if ($str === '' || $substring === '') {
10779 2
            return 0;
10780
        }
10781
10782 13
        if ($encoding === 'UTF-8') {
10783 7
            if ($case_sensitive) {
10784
                return (int) \mb_substr_count($str, $substring);
10785
            }
10786
10787 7
            return (int) \mb_substr_count(
10788 7
                \mb_strtoupper($str),
10789 7
                \mb_strtoupper($substring)
10790
            );
10791
        }
10792
10793 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10794
10795 6
        if ($case_sensitive) {
10796 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10797
        }
10798
10799 3
        return (int) \mb_substr_count(
10800 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10801 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10802 3
            $encoding
10803
        );
10804
    }
10805
10806
    /**
10807
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
10808
     *
10809
     * @param string $haystack <p>The string to search in.</p>
10810
     * @param string $needle   <p>The substring to search for.</p>
10811
     *
10812
     * @return string return the sub-string
10813
     */
10814 2
    public static function substr_ileft(string $haystack, string $needle): string
10815
    {
10816 2
        if ($haystack === '') {
10817 2
            return '';
10818
        }
10819
10820 2
        if ($needle === '') {
10821 2
            return $haystack;
10822
        }
10823
10824 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10825 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10826
        }
10827
10828 2
        return $haystack;
10829
    }
10830
10831
    /**
10832
     * Get part of a string process in bytes.
10833
     *
10834
     * @param string $str    <p>The string being checked.</p>
10835
     * @param int    $offset <p>The first position used in str.</p>
10836
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10837
     *
10838
     * @return false|string
10839
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10840
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10841
     *                      characters long, <b>FALSE</b> will be returned.
10842
     */
10843
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10844
    {
10845
        // empty string
10846
        if ($str === '' || $length === 0) {
10847
            return '';
10848
        }
10849
10850
        // whole string
10851
        if (!$offset && $length === null) {
10852
            return $str;
10853
        }
10854
10855
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10856
            // "mb_" is available if overload is used, so use it ...
10857
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10858
        }
10859
10860
        return \substr($str, $offset, $length ?? 2147483647);
10861
    }
10862
10863
    /**
10864
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
10865
     *
10866
     * @param string $haystack <p>The string to search in.</p>
10867
     * @param string $needle   <p>The substring to search for.</p>
10868
     *
10869
     * @return string return the sub-string
10870
     */
10871 2
    public static function substr_iright(string $haystack, string $needle): string
10872
    {
10873 2
        if ($haystack === '') {
10874 2
            return '';
10875
        }
10876
10877 2
        if ($needle === '') {
10878 2
            return $haystack;
10879
        }
10880
10881 2
        if (self::str_iends_with($haystack, $needle) === true) {
10882 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10883
        }
10884
10885 2
        return $haystack;
10886
    }
10887
10888
    /**
10889
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
10890
     *
10891
     * @param string $haystack <p>The string to search in.</p>
10892
     * @param string $needle   <p>The substring to search for.</p>
10893
     *
10894
     * @return string return the sub-string
10895
     */
10896 2
    public static function substr_left(string $haystack, string $needle): string
10897
    {
10898 2
        if ($haystack === '') {
10899 2
            return '';
10900
        }
10901
10902 2
        if ($needle === '') {
10903 2
            return $haystack;
10904
        }
10905
10906 2
        if (self::str_starts_with($haystack, $needle) === true) {
10907 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10908
        }
10909
10910 2
        return $haystack;
10911
    }
10912
10913
    /**
10914
     * Replace text within a portion of a string.
10915
     *
10916
     * source: https://gist.github.com/stemar/8287074
10917
     *
10918
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10919
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10920
     * @param int|int[]       $offset      <p>
10921
     *                                     If start is positive, the replacing will begin at the start'th offset
10922
     *                                     into string.
10923
     *                                     <br><br>
10924
     *                                     If start is negative, the replacing will begin at the start'th character
10925
     *                                     from the end of string.
10926
     *                                     </p>
10927
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10928
     *                                     portion of string which is to be replaced. If it is negative, it
10929
     *                                     represents the number of characters from the end of string at which to
10930
     *                                     stop replacing. If it is not given, then it will default to strlen(
10931
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10932
     *                                     length is zero then this function will have the effect of inserting
10933
     *                                     replacement into string at the given start offset.</p>
10934
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10935
     *
10936
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10937
     */
10938 10
    public static function substr_replace(
10939
        $str,
10940
        $replacement,
10941
        $offset,
10942
        $length = null,
10943
        string $encoding = 'UTF-8'
10944
    ) {
10945 10
        if (\is_array($str) === true) {
10946 1
            $num = \count($str);
10947
10948
            // the replacement
10949 1
            if (\is_array($replacement) === true) {
10950 1
                $replacement = \array_slice($replacement, 0, $num);
10951
            } else {
10952 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10953
            }
10954
10955
            // the offset
10956 1
            if (\is_array($offset) === true) {
10957 1
                $offset = \array_slice($offset, 0, $num);
10958 1
                foreach ($offset as &$value_tmp) {
10959 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
10960
                }
10961 1
                unset($value_tmp);
10962
            } else {
10963 1
                $offset = \array_pad([$offset], $num, $offset);
10964
            }
10965
10966
            // the length
10967 1
            if ($length === null) {
10968 1
                $length = \array_fill(0, $num, 0);
10969 1
            } elseif (\is_array($length) === true) {
10970 1
                $length = \array_slice($length, 0, $num);
10971 1
                foreach ($length as &$value_tmp_V2) {
10972 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
10973
                }
10974 1
                unset($value_tmp_V2);
10975
            } else {
10976 1
                $length = \array_pad([$length], $num, $length);
10977
            }
10978
10979
            // recursive call
10980 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10981
        }
10982
10983 10
        if (\is_array($replacement) === true) {
10984 1
            if ($replacement !== []) {
10985 1
                $replacement = $replacement[0];
10986
            } else {
10987 1
                $replacement = '';
10988
            }
10989
        }
10990
10991
        // init
10992 10
        $str = (string) $str;
10993 10
        $replacement = (string) $replacement;
10994
10995 10
        if (\is_array($length) === true) {
10996
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10997
        }
10998
10999 10
        if (\is_array($offset) === true) {
11000
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
11001
        }
11002
11003 10
        if ($str === '') {
11004 1
            return $replacement;
11005
        }
11006
11007 9
        if (self::$SUPPORT['mbstring'] === true) {
11008 9
            $string_length = (int) self::strlen($str, $encoding);
11009
11010 9
            if ($offset < 0) {
11011 1
                $offset = (int) \max(0, $string_length + $offset);
11012 9
            } elseif ($offset > $string_length) {
11013 1
                $offset = $string_length;
11014
            }
11015
11016 9
            if ($length !== null && $length < 0) {
11017 1
                $length = (int) \max(0, $string_length - $offset + $length);
11018 9
            } elseif ($length === null || $length > $string_length) {
11019 4
                $length = $string_length;
11020
            }
11021
11022
            /** @noinspection AdditionOperationOnArraysInspection */
11023 9
            if (($offset + $length) > $string_length) {
11024 4
                $length = $string_length - $offset;
11025
            }
11026
11027
            /** @noinspection AdditionOperationOnArraysInspection */
11028 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
11029 9
                   $replacement .
11030 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
11031
        }
11032
11033
        //
11034
        // fallback for ascii only
11035
        //
11036
11037
        if (ASCII::is_ascii($str)) {
11038
            return ($length === null) ?
11039
                \substr_replace($str, $replacement, $offset) :
11040
                \substr_replace($str, $replacement, $offset, $length);
11041
        }
11042
11043
        //
11044
        // fallback via vanilla php
11045
        //
11046
11047
        \preg_match_all('/./us', $str, $str_matches);
11048
        \preg_match_all('/./us', $replacement, $replacement_matches);
11049
11050
        if ($length === null) {
11051
            $length_tmp = self::strlen($str, $encoding);
11052
            if ($length_tmp === false) {
11053
                // e.g.: non mbstring support + invalid chars
11054
                return '';
11055
            }
11056
            $length = (int) $length_tmp;
11057
        }
11058
11059
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
11060
11061
        return \implode('', $str_matches[0]);
11062
    }
11063
11064
    /**
11065
     * Removes a suffix ($needle) from the end of the string ($haystack).
11066
     *
11067
     * @param string $haystack <p>The string to search in.</p>
11068
     * @param string $needle   <p>The substring to search for.</p>
11069
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
11070
     *
11071
     * @return string return the sub-string
11072
     */
11073 2
    public static function substr_right(
11074
        string $haystack,
11075
        string $needle,
11076
        string $encoding = 'UTF-8'
11077
    ): string {
11078 2
        if ($haystack === '') {
11079 2
            return '';
11080
        }
11081
11082 2
        if ($needle === '') {
11083 2
            return $haystack;
11084
        }
11085
11086
        if (
11087 2
            $encoding === 'UTF-8'
11088
            &&
11089 2
            \substr($haystack, -\strlen($needle)) === $needle
11090
        ) {
11091 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
11092
        }
11093
11094 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
11095
            return (string) self::substr(
11096
                $haystack,
11097
                0,
11098
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
11099
                $encoding
11100
            );
11101
        }
11102
11103 2
        return $haystack;
11104
    }
11105
11106
    /**
11107
     * Returns a case swapped version of the string.
11108
     *
11109
     * @param string $str        <p>The input string.</p>
11110
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11111
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11112
     *
11113
     * @return string each character's case swapped
11114
     */
11115 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
11116
    {
11117 6
        if ($str === '') {
11118 1
            return '';
11119
        }
11120
11121 6
        if ($clean_utf8 === true) {
11122
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11123
            // if invalid characters are found in $haystack before $needle
11124 2
            $str = self::clean($str);
11125
        }
11126
11127 6
        if ($encoding === 'UTF-8') {
11128 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
11129
        }
11130
11131 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
11132
    }
11133
11134
    /**
11135
     * Checks whether symfony-polyfills are used.
11136
     *
11137
     * @return bool
11138
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11139
     */
11140
    public static function symfony_polyfill_used(): bool
11141
    {
11142
        // init
11143
        $return = false;
11144
11145
        $return_tmp = \extension_loaded('mbstring');
11146
        if ($return_tmp === false && \function_exists('mb_strlen')) {
11147
            $return = true;
11148
        }
11149
11150
        $return_tmp = \extension_loaded('iconv');
11151
        if ($return_tmp === false && \function_exists('iconv')) {
11152
            $return = true;
11153
        }
11154
11155
        return $return;
11156
    }
11157
11158
    /**
11159
     * @param string $str
11160
     * @param int    $tab_length
11161
     *
11162
     * @return string
11163
     */
11164 6
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
11165
    {
11166 6
        if ($tab_length === 4) {
11167 3
            $spaces = '    ';
11168 3
        } elseif ($tab_length === 2) {
11169 1
            $spaces = '  ';
11170
        } else {
11171 2
            $spaces = \str_repeat(' ', $tab_length);
11172
        }
11173
11174 6
        return \str_replace("\t", $spaces, $str);
11175
    }
11176
11177
    /**
11178
     * Converts the first character of each word in the string to uppercase
11179
     * and all other chars to lowercase.
11180
     *
11181
     * @param string      $str                           <p>The input string.</p>
11182
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11183
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11184
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11185
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11186
     *
11187
     * @return string
11188
     *                <p>A string with all characters of $str being title-cased.</p>
11189
     */
11190 5
    public static function titlecase(
11191
        string $str,
11192
        string $encoding = 'UTF-8',
11193
        bool $clean_utf8 = false,
11194
        string $lang = null,
11195
        bool $try_to_keep_the_string_length = false
11196
    ): string {
11197 5
        if ($clean_utf8 === true) {
11198
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11199
            // if invalid characters are found in $haystack before $needle
11200
            $str = self::clean($str);
11201
        }
11202
11203 5
        if ($lang === null && $try_to_keep_the_string_length === false) {
11204 5
            if ($encoding === 'UTF-8') {
11205 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11206
            }
11207
11208 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11209
11210 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11211
        }
11212
11213
        return self::str_titleize(
11214
            $str,
11215
            null,
11216
            $encoding,
11217
            false,
11218
            $lang,
11219
            $try_to_keep_the_string_length,
11220
            false
11221
        );
11222
    }
11223
11224
    /**
11225
     * alias for "UTF8::to_ascii()"
11226
     *
11227
     * @param string $str
11228
     * @param string $subst_chr
11229
     * @param bool   $strict
11230
     *
11231
     * @return string
11232
     *
11233
     * @see UTF8::to_ascii()
11234
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
11235
     */
11236 7
    public static function toAscii(
11237
        string $str,
11238
        string $subst_chr = '?',
11239
        bool $strict = false
11240
    ): string {
11241 7
        return self::to_ascii($str, $subst_chr, $strict);
11242
    }
11243
11244
    /**
11245
     * alias for "UTF8::to_iso8859()"
11246
     *
11247
     * @param string|string[] $str
11248
     *
11249
     * @return string|string[]
11250
     *
11251
     * @see UTF8::to_iso8859()
11252
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11253
     */
11254 2
    public static function toIso8859($str)
11255
    {
11256 2
        return self::to_iso8859($str);
11257
    }
11258
11259
    /**
11260
     * alias for "UTF8::to_latin1()"
11261
     *
11262
     * @param string|string[] $str
11263
     *
11264
     * @return string|string[]
11265
     *
11266
     * @see UTF8::to_iso8859()
11267
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11268
     */
11269 2
    public static function toLatin1($str)
11270
    {
11271 2
        return self::to_iso8859($str);
11272
    }
11273
11274
    /**
11275
     * alias for "UTF8::to_utf8()"
11276
     *
11277
     * @param string|string[] $str
11278
     *
11279
     * @return string|string[]
11280
     *
11281
     * @see UTF8::to_utf8()
11282
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
11283
     */
11284 2
    public static function toUTF8($str)
11285
    {
11286 2
        return self::to_utf8($str);
11287
    }
11288
11289
    /**
11290
     * Convert a string into ASCII.
11291
     *
11292
     * @param string $str     <p>The input string.</p>
11293
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11294
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11295
     *                        performance</p>
11296
     *
11297
     * @return string
11298
     */
11299 37
    public static function to_ascii(
11300
        string $str,
11301
        string $unknown = '?',
11302
        bool $strict = false
11303
    ): string {
11304 37
        return ASCII::to_transliterate($str, $unknown, $strict);
11305
    }
11306
11307
    /**
11308
     * @param mixed $str
11309
     *
11310
     * @return bool
11311
     */
11312 19
    public static function to_boolean($str): bool
11313
    {
11314
        // init
11315 19
        $str = (string) $str;
11316
11317 19
        if ($str === '') {
11318 2
            return false;
11319
        }
11320
11321
        // Info: http://php.net/manual/en/filter.filters.validate.php
11322
        $map = [
11323 17
            'true'  => true,
11324
            '1'     => true,
11325
            'on'    => true,
11326
            'yes'   => true,
11327
            'false' => false,
11328
            '0'     => false,
11329
            'off'   => false,
11330
            'no'    => false,
11331
        ];
11332
11333 17
        if (isset($map[$str])) {
11334 11
            return $map[$str];
11335
        }
11336
11337 6
        $key = \strtolower($str);
11338 6
        if (isset($map[$key])) {
11339 2
            return $map[$key];
11340
        }
11341
11342 4
        if (\is_numeric($str)) {
11343 2
            return ((float) $str + 0) > 0;
11344
        }
11345
11346 2
        return (bool) \trim($str);
11347
    }
11348
11349
    /**
11350
     * Convert given string to safe filename (and keep string case).
11351
     *
11352
     * @param string $str
11353
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11354
     *                                  simply replaced with hyphen.
11355
     * @param string $fallback_char
11356
     *
11357
     * @return string
11358
     */
11359 1
    public static function to_filename(
11360
        string $str,
11361
        bool $use_transliterate = false,
11362
        string $fallback_char = '-'
11363
    ): string {
11364 1
        return ASCII::to_filename(
11365 1
            $str,
11366 1
            $use_transliterate,
11367 1
            $fallback_char
11368
        );
11369
    }
11370
11371
    /**
11372
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11373
     *
11374
     * @param string|string[] $str
11375
     *
11376
     * @return string|string[]
11377
     */
11378 8
    public static function to_iso8859($str)
11379
    {
11380 8
        if (\is_array($str) === true) {
11381 2
            foreach ($str as $k => &$v) {
11382 2
                $v = self::to_iso8859($v);
11383
            }
11384
11385 2
            return $str;
11386
        }
11387
11388 8
        $str = (string) $str;
11389 8
        if ($str === '') {
11390 2
            return '';
11391
        }
11392
11393 8
        return self::utf8_decode($str);
11394
    }
11395
11396
    /**
11397
     * alias for "UTF8::to_iso8859()"
11398
     *
11399
     * @param string|string[] $str
11400
     *
11401
     * @return string|string[]
11402
     *
11403
     * @see UTF8::to_iso8859()
11404
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
11405
     */
11406 2
    public static function to_latin1($str)
11407
    {
11408 2
        return self::to_iso8859($str);
11409
    }
11410
11411
    /**
11412
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11413
     *
11414
     * <ul>
11415
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
11416
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11417
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11418
     * case.</li>
11419
     * </ul>
11420
     *
11421
     * @param string|string[] $str                        <p>Any string or array.</p>
11422
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
11423
     *
11424
     * @return string|string[] the UTF-8 encoded string
11425
     */
11426 42
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
11427
    {
11428 42
        if (\is_array($str) === true) {
11429 4
            foreach ($str as $k => &$v) {
11430 4
                $v = self::to_utf8($v, $decode_html_entity_to_utf8);
11431
            }
11432
11433 4
            return $str;
11434
        }
11435
11436 42
        $str = (string) $str;
11437 42
        if ($str === '') {
11438 7
            return $str;
11439
        }
11440
11441 42
        $max = \strlen($str);
11442 42
        $buf = '';
11443
11444 42
        for ($i = 0; $i < $max; ++$i) {
11445 42
            $c1 = $str[$i];
11446
11447 42
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11448
11449 38
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11450
11451 35
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11452
11453 35
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11454 21
                        $buf .= $c1 . $c2;
11455 21
                        ++$i;
11456
                    } else { // not valid UTF8 - convert it
11457 35
                        $buf .= self::to_utf8_convert_helper($c1);
11458
                    }
11459 35
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11460
11461 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11462 34
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11463
11464 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11465 16
                        $buf .= $c1 . $c2 . $c3;
11466 16
                        $i += 2;
11467
                    } else { // not valid UTF8 - convert it
11468 34
                        $buf .= self::to_utf8_convert_helper($c1);
11469
                    }
11470 27
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11471
11472 27
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11473 27
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11474 27
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11475
11476 27
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11477 9
                        $buf .= $c1 . $c2 . $c3 . $c4;
11478 9
                        $i += 3;
11479
                    } else { // not valid UTF8 - convert it
11480 27
                        $buf .= self::to_utf8_convert_helper($c1);
11481
                    }
11482
                } else { // doesn't look like UTF8, but should be converted
11483
11484 38
                    $buf .= self::to_utf8_convert_helper($c1);
11485
                }
11486 39
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11487
11488 4
                $buf .= self::to_utf8_convert_helper($c1);
11489
            } else { // it doesn't need conversion
11490
11491 39
                $buf .= $c1;
11492
            }
11493
        }
11494
11495
        // decode unicode escape sequences + unicode surrogate pairs
11496 42
        $buf = \preg_replace_callback(
11497 42
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11498
            /**
11499
             * @param array $matches
11500
             *
11501
             * @return string
11502
             */
11503
            static function (array $matches): string {
11504 12
                if (isset($matches[3])) {
11505 12
                    $cp = (int) \hexdec($matches[3]);
11506
                } else {
11507
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11508
                    $cp = ((int) \hexdec($matches[1]) << 10)
11509
                          + (int) \hexdec($matches[2])
11510
                          + 0x10000
11511
                          - (0xD800 << 10)
11512
                          - 0xDC00;
11513
                }
11514
11515
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11516
                //
11517
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11518
11519 12
                if ($cp < 0x80) {
11520 8
                    return (string) self::chr($cp);
11521
                }
11522
11523 9
                if ($cp < 0xA0) {
11524
                    /** @noinspection UnnecessaryCastingInspection */
11525
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11526
                }
11527
11528 9
                return self::decimal_to_chr($cp);
11529 42
            },
11530 42
            $buf
11531
        );
11532
11533 42
        if ($buf === null) {
11534
            return '';
11535
        }
11536
11537
        // decode UTF-8 codepoints
11538 42
        if ($decode_html_entity_to_utf8 === true) {
11539 2
            $buf = self::html_entity_decode($buf);
11540
        }
11541
11542 42
        return $buf;
11543
    }
11544
11545
    /**
11546
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
11547
     *
11548
     * INFO: This is slower then "trim()"
11549
     *
11550
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11551
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
11552
     *
11553
     * @param string      $str   <p>The string to be trimmed</p>
11554
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11555
     *
11556
     * @return string the trimmed string
11557
     */
11558 56
    public static function trim(string $str = '', string $chars = null): string
11559
    {
11560 56
        if ($str === '') {
11561 9
            return '';
11562
        }
11563
11564 49
        if (self::$SUPPORT['mbstring'] === true) {
11565 49
            if ($chars) {
11566
                /** @noinspection PregQuoteUsageInspection */
11567 27
                $chars = \preg_quote($chars);
11568 27
                $pattern = "^[${chars}]+|[${chars}]+\$";
11569
            } else {
11570 22
                $pattern = '^[\\s]+|[\\s]+$';
11571
            }
11572
11573
            /** @noinspection PhpComposerExtensionStubsInspection */
11574 49
            return (string) \mb_ereg_replace($pattern, '', $str);
11575
        }
11576
11577 8
        if ($chars) {
11578
            $chars = \preg_quote($chars, '/');
11579
            $pattern = "^[${chars}]+|[${chars}]+\$";
11580
        } else {
11581 8
            $pattern = '^[\\s]+|[\\s]+$';
11582
        }
11583
11584 8
        return self::regex_replace($str, $pattern, '', '', '/');
11585
    }
11586
11587
    /**
11588
     * Makes string's first char uppercase.
11589
     *
11590
     * @param string      $str                           <p>The input string.</p>
11591
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11592
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11593
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11594
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11595
     *
11596
     * @return string the resulting string
11597
     */
11598 69
    public static function ucfirst(
11599
        string $str,
11600
        string $encoding = 'UTF-8',
11601
        bool $clean_utf8 = false,
11602
        string $lang = null,
11603
        bool $try_to_keep_the_string_length = false
11604
    ): string {
11605 69
        if ($str === '') {
11606 3
            return '';
11607
        }
11608
11609 68
        if ($clean_utf8 === true) {
11610
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11611
            // if invalid characters are found in $haystack before $needle
11612 1
            $str = self::clean($str);
11613
        }
11614
11615 68
        $use_mb_functions = $lang === null && $try_to_keep_the_string_length === false;
11616
11617 68
        if ($encoding === 'UTF-8') {
11618 22
            $str_part_two = (string) \mb_substr($str, 1);
11619
11620 22
            if ($use_mb_functions === true) {
11621 22
                $str_part_one = \mb_strtoupper(
11622 22
                    (string) \mb_substr($str, 0, 1)
11623
                );
11624
            } else {
11625
                $str_part_one = self::strtoupper(
11626
                    (string) \mb_substr($str, 0, 1),
11627
                    $encoding,
11628
                    false,
11629
                    $lang,
11630 22
                    $try_to_keep_the_string_length
11631
                );
11632
            }
11633
        } else {
11634 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11635
11636 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
11637
11638 47
            if ($use_mb_functions === true) {
11639 47
                $str_part_one = \mb_strtoupper(
11640 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11641 47
                    $encoding
11642
                );
11643
            } else {
11644
                $str_part_one = self::strtoupper(
11645
                    (string) self::substr($str, 0, 1, $encoding),
11646
                    $encoding,
11647
                    false,
11648
                    $lang,
11649
                    $try_to_keep_the_string_length
11650
                );
11651
            }
11652
        }
11653
11654 68
        return $str_part_one . $str_part_two;
11655
    }
11656
11657
    /**
11658
     * alias for "UTF8::ucfirst()"
11659
     *
11660
     * @param string $str
11661
     * @param string $encoding
11662
     * @param bool   $clean_utf8
11663
     *
11664
     * @return string
11665
     *
11666
     * @see UTF8::ucfirst()
11667
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
11668
     */
11669 1
    public static function ucword(
11670
        string $str,
11671
        string $encoding = 'UTF-8',
11672
        bool $clean_utf8 = false
11673
    ): string {
11674 1
        return self::ucfirst($str, $encoding, $clean_utf8);
11675
    }
11676
11677
    /**
11678
     * Uppercase for all words in the string.
11679
     *
11680
     * @param string   $str        <p>The input string.</p>
11681
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11682
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
11683
     *                             word.</p>
11684
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11685
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11686
     *
11687
     * @return string
11688
     */
11689 8
    public static function ucwords(
11690
        string $str,
11691
        array $exceptions = [],
11692
        string $char_list = '',
11693
        string $encoding = 'UTF-8',
11694
        bool $clean_utf8 = false
11695
    ): string {
11696 8
        if (!$str) {
11697 2
            return '';
11698
        }
11699
11700
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11701
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11702
11703 7
        if ($clean_utf8 === true) {
11704
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11705
            // if invalid characters are found in $haystack before $needle
11706 1
            $str = self::clean($str);
11707
        }
11708
11709 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
11710
11711
        if (
11712 7
            $use_php_default_functions === true
11713
            &&
11714 7
            ASCII::is_ascii($str) === true
11715
        ) {
11716
            return \ucwords($str);
11717
        }
11718
11719 7
        $words = self::str_to_words($str, $char_list);
11720 7
        $use_exceptions = $exceptions !== [];
11721
11722 7
        $words_str = '';
11723 7
        foreach ($words as &$word) {
11724 7
            if (!$word) {
11725 7
                continue;
11726
            }
11727
11728
            if (
11729 7
                $use_exceptions === false
11730
                ||
11731 7
                !\in_array($word, $exceptions, true)
11732
            ) {
11733 7
                $words_str .= self::ucfirst($word, $encoding);
11734
            } else {
11735 7
                $words_str .= $word;
11736
            }
11737
        }
11738
11739 7
        return $words_str;
11740
    }
11741
11742
    /**
11743
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
11744
     *
11745
     * e.g:
11746
     * 'test+test'                     => 'test test'
11747
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11748
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11749
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11750
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11751
     * 'Düsseldorf'                   => 'Düsseldorf'
11752
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11753
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11754
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11755
     *
11756
     * @param string $str          <p>The input string.</p>
11757
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11758
     *
11759
     * @return string
11760
     */
11761 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11762
    {
11763 4
        if ($str === '') {
11764 3
            return '';
11765
        }
11766
11767
        if (
11768 4
            \strpos($str, '&') === false
11769
            &&
11770 4
            \strpos($str, '%') === false
11771
            &&
11772 4
            \strpos($str, '+') === false
11773
            &&
11774 4
            \strpos($str, '\u') === false
11775
        ) {
11776 3
            return self::fix_simple_utf8($str);
11777
        }
11778
11779 4
        $str = self::urldecode_unicode_helper($str);
11780
11781 4
        if ($multi_decode === true) {
11782
            do {
11783 3
                $str_compare = $str;
11784
11785
                /**
11786
                 * @psalm-suppress PossiblyInvalidArgument
11787
                 */
11788 3
                $str = self::fix_simple_utf8(
11789 3
                    \urldecode(
11790 3
                        self::html_entity_decode(
11791 3
                            self::to_utf8($str),
11792 3
                            \ENT_QUOTES | \ENT_HTML5
11793
                        )
11794
                    )
11795
                );
11796 3
            } while ($str_compare !== $str);
11797
        } else {
11798
            /**
11799
             * @psalm-suppress PossiblyInvalidArgument
11800
             */
11801 1
            $str = self::fix_simple_utf8(
11802 1
                \urldecode(
11803 1
                    self::html_entity_decode(
11804 1
                        self::to_utf8($str),
11805 1
                        \ENT_QUOTES | \ENT_HTML5
11806
                    )
11807
                )
11808
            );
11809
        }
11810
11811 4
        return $str;
11812
    }
11813
11814
    /**
11815
     * Return a array with "urlencoded"-win1252 -> UTF-8
11816
     *
11817
     * @return string[]
11818
     *
11819
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
11820
     */
11821 2
    public static function urldecode_fix_win1252_chars(): array
11822
    {
11823
        return [
11824 2
            '%20' => ' ',
11825
            '%21' => '!',
11826
            '%22' => '"',
11827
            '%23' => '#',
11828
            '%24' => '$',
11829
            '%25' => '%',
11830
            '%26' => '&',
11831
            '%27' => "'",
11832
            '%28' => '(',
11833
            '%29' => ')',
11834
            '%2A' => '*',
11835
            '%2B' => '+',
11836
            '%2C' => ',',
11837
            '%2D' => '-',
11838
            '%2E' => '.',
11839
            '%2F' => '/',
11840
            '%30' => '0',
11841
            '%31' => '1',
11842
            '%32' => '2',
11843
            '%33' => '3',
11844
            '%34' => '4',
11845
            '%35' => '5',
11846
            '%36' => '6',
11847
            '%37' => '7',
11848
            '%38' => '8',
11849
            '%39' => '9',
11850
            '%3A' => ':',
11851
            '%3B' => ';',
11852
            '%3C' => '<',
11853
            '%3D' => '=',
11854
            '%3E' => '>',
11855
            '%3F' => '?',
11856
            '%40' => '@',
11857
            '%41' => 'A',
11858
            '%42' => 'B',
11859
            '%43' => 'C',
11860
            '%44' => 'D',
11861
            '%45' => 'E',
11862
            '%46' => 'F',
11863
            '%47' => 'G',
11864
            '%48' => 'H',
11865
            '%49' => 'I',
11866
            '%4A' => 'J',
11867
            '%4B' => 'K',
11868
            '%4C' => 'L',
11869
            '%4D' => 'M',
11870
            '%4E' => 'N',
11871
            '%4F' => 'O',
11872
            '%50' => 'P',
11873
            '%51' => 'Q',
11874
            '%52' => 'R',
11875
            '%53' => 'S',
11876
            '%54' => 'T',
11877
            '%55' => 'U',
11878
            '%56' => 'V',
11879
            '%57' => 'W',
11880
            '%58' => 'X',
11881
            '%59' => 'Y',
11882
            '%5A' => 'Z',
11883
            '%5B' => '[',
11884
            '%5C' => '\\',
11885
            '%5D' => ']',
11886
            '%5E' => '^',
11887
            '%5F' => '_',
11888
            '%60' => '`',
11889
            '%61' => 'a',
11890
            '%62' => 'b',
11891
            '%63' => 'c',
11892
            '%64' => 'd',
11893
            '%65' => 'e',
11894
            '%66' => 'f',
11895
            '%67' => 'g',
11896
            '%68' => 'h',
11897
            '%69' => 'i',
11898
            '%6A' => 'j',
11899
            '%6B' => 'k',
11900
            '%6C' => 'l',
11901
            '%6D' => 'm',
11902
            '%6E' => 'n',
11903
            '%6F' => 'o',
11904
            '%70' => 'p',
11905
            '%71' => 'q',
11906
            '%72' => 'r',
11907
            '%73' => 's',
11908
            '%74' => 't',
11909
            '%75' => 'u',
11910
            '%76' => 'v',
11911
            '%77' => 'w',
11912
            '%78' => 'x',
11913
            '%79' => 'y',
11914
            '%7A' => 'z',
11915
            '%7B' => '{',
11916
            '%7C' => '|',
11917
            '%7D' => '}',
11918
            '%7E' => '~',
11919
            '%7F' => '',
11920
            '%80' => '`',
11921
            '%81' => '',
11922
            '%82' => '‚',
11923
            '%83' => 'ƒ',
11924
            '%84' => '„',
11925
            '%85' => '…',
11926
            '%86' => '†',
11927
            '%87' => '‡',
11928
            '%88' => 'ˆ',
11929
            '%89' => '‰',
11930
            '%8A' => 'Š',
11931
            '%8B' => '‹',
11932
            '%8C' => 'Œ',
11933
            '%8D' => '',
11934
            '%8E' => 'Ž',
11935
            '%8F' => '',
11936
            '%90' => '',
11937
            '%91' => '‘',
11938
            '%92' => '’',
11939
            '%93' => '“',
11940
            '%94' => '”',
11941
            '%95' => '•',
11942
            '%96' => '–',
11943
            '%97' => '—',
11944
            '%98' => '˜',
11945
            '%99' => '™',
11946
            '%9A' => 'š',
11947
            '%9B' => '›',
11948
            '%9C' => 'œ',
11949
            '%9D' => '',
11950
            '%9E' => 'ž',
11951
            '%9F' => 'Ÿ',
11952
            '%A0' => '',
11953
            '%A1' => '¡',
11954
            '%A2' => '¢',
11955
            '%A3' => '£',
11956
            '%A4' => '¤',
11957
            '%A5' => '¥',
11958
            '%A6' => '¦',
11959
            '%A7' => '§',
11960
            '%A8' => '¨',
11961
            '%A9' => '©',
11962
            '%AA' => 'ª',
11963
            '%AB' => '«',
11964
            '%AC' => '¬',
11965
            '%AD' => '',
11966
            '%AE' => '®',
11967
            '%AF' => '¯',
11968
            '%B0' => '°',
11969
            '%B1' => '±',
11970
            '%B2' => '²',
11971
            '%B3' => '³',
11972
            '%B4' => '´',
11973
            '%B5' => 'µ',
11974
            '%B6' => '¶',
11975
            '%B7' => '·',
11976
            '%B8' => '¸',
11977
            '%B9' => '¹',
11978
            '%BA' => 'º',
11979
            '%BB' => '»',
11980
            '%BC' => '¼',
11981
            '%BD' => '½',
11982
            '%BE' => '¾',
11983
            '%BF' => '¿',
11984
            '%C0' => 'À',
11985
            '%C1' => 'Á',
11986
            '%C2' => 'Â',
11987
            '%C3' => 'Ã',
11988
            '%C4' => 'Ä',
11989
            '%C5' => 'Å',
11990
            '%C6' => 'Æ',
11991
            '%C7' => 'Ç',
11992
            '%C8' => 'È',
11993
            '%C9' => 'É',
11994
            '%CA' => 'Ê',
11995
            '%CB' => 'Ë',
11996
            '%CC' => 'Ì',
11997
            '%CD' => 'Í',
11998
            '%CE' => 'Î',
11999
            '%CF' => 'Ï',
12000
            '%D0' => 'Ð',
12001
            '%D1' => 'Ñ',
12002
            '%D2' => 'Ò',
12003
            '%D3' => 'Ó',
12004
            '%D4' => 'Ô',
12005
            '%D5' => 'Õ',
12006
            '%D6' => 'Ö',
12007
            '%D7' => '×',
12008
            '%D8' => 'Ø',
12009
            '%D9' => 'Ù',
12010
            '%DA' => 'Ú',
12011
            '%DB' => 'Û',
12012
            '%DC' => 'Ü',
12013
            '%DD' => 'Ý',
12014
            '%DE' => 'Þ',
12015
            '%DF' => 'ß',
12016
            '%E0' => 'à',
12017
            '%E1' => 'á',
12018
            '%E2' => 'â',
12019
            '%E3' => 'ã',
12020
            '%E4' => 'ä',
12021
            '%E5' => 'å',
12022
            '%E6' => 'æ',
12023
            '%E7' => 'ç',
12024
            '%E8' => 'è',
12025
            '%E9' => 'é',
12026
            '%EA' => 'ê',
12027
            '%EB' => 'ë',
12028
            '%EC' => 'ì',
12029
            '%ED' => 'í',
12030
            '%EE' => 'î',
12031
            '%EF' => 'ï',
12032
            '%F0' => 'ð',
12033
            '%F1' => 'ñ',
12034
            '%F2' => 'ò',
12035
            '%F3' => 'ó',
12036
            '%F4' => 'ô',
12037
            '%F5' => 'õ',
12038
            '%F6' => 'ö',
12039
            '%F7' => '÷',
12040
            '%F8' => 'ø',
12041
            '%F9' => 'ù',
12042
            '%FA' => 'ú',
12043
            '%FB' => 'û',
12044
            '%FC' => 'ü',
12045
            '%FD' => 'ý',
12046
            '%FE' => 'þ',
12047
            '%FF' => 'ÿ',
12048
        ];
12049
    }
12050
12051
    /**
12052
     * Decodes a UTF-8 string to ISO-8859-1.
12053
     *
12054
     * @param string $str             <p>The input string.</p>
12055
     * @param bool   $keep_utf8_chars
12056
     *
12057
     * @return string
12058
     */
12059 14
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
12060
    {
12061 14
        if ($str === '') {
12062 6
            return '';
12063
        }
12064
12065
        // save for later comparision
12066 14
        $str_backup = $str;
12067 14
        $len = \strlen($str);
12068
12069 14
        if (self::$ORD === null) {
12070
            self::$ORD = self::getData('ord');
12071
        }
12072
12073 14
        if (self::$CHR === null) {
12074
            self::$CHR = self::getData('chr');
12075
        }
12076
12077 14
        $no_char_found = '?';
12078
        /** @noinspection ForeachInvariantsInspection */
12079 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12080 14
            switch ($str[$i] & "\xF0") {
12081 14
                case "\xC0":
12082 13
                case "\xD0":
12083 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12084 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
12085
12086 13
                    break;
12087
12088
                /** @noinspection PhpMissingBreakStatementInspection */
12089 13
                case "\xF0":
12090
                    ++$i;
12091
12092
                // no break
12093
12094 13
                case "\xE0":
12095 11
                    $str[$j] = $no_char_found;
12096 11
                    $i += 2;
12097
12098 11
                    break;
12099
12100
                default:
12101 12
                    $str[$j] = $str[$i];
12102
            }
12103
        }
12104
12105
        /** @var false|string $return - needed for PhpStan (stubs error) */
12106 14
        $return = \substr($str, 0, $j);
12107 14
        if ($return === false) {
12108
            $return = '';
12109
        }
12110
12111
        if (
12112 14
            $keep_utf8_chars === true
12113
            &&
12114 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
12115
        ) {
12116 2
            return $str_backup;
12117
        }
12118
12119 14
        return $return;
12120
    }
12121
12122
    /**
12123
     * Encodes an ISO-8859-1 string to UTF-8.
12124
     *
12125
     * @param string $str <p>The input string.</p>
12126
     *
12127
     * @return string
12128
     */
12129 14
    public static function utf8_encode(string $str): string
12130
    {
12131 14
        if ($str === '') {
12132 14
            return '';
12133
        }
12134
12135
        /** @var false|string $str - the polyfill maybe return false */
12136 14
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

12136
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
12137
12138
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12139
        /** @psalm-suppress TypeDoesNotContainType */
12140 14
        if ($str === false) {
12141
            return '';
12142
        }
12143
12144 14
        return $str;
12145
    }
12146
12147
    /**
12148
     * fix -> utf8-win1252 chars
12149
     *
12150
     * @param string $str <p>The input string.</p>
12151
     *
12152
     * @return string
12153
     *
12154
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
12155
     */
12156 2
    public static function utf8_fix_win1252_chars(string $str): string
12157
    {
12158 2
        return self::fix_simple_utf8($str);
12159
    }
12160
12161
    /**
12162
     * Returns an array with all utf8 whitespace characters.
12163
     *
12164
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12165
     *
12166
     * @return string[]
12167
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12168
     *                  as defined in above URL
12169
     */
12170 2
    public static function whitespace_table(): array
12171
    {
12172 2
        return self::$WHITESPACE_TABLE;
12173
    }
12174
12175
    /**
12176
     * Limit the number of words in a string.
12177
     *
12178
     * @param string $str        <p>The input string.</p>
12179
     * @param int    $limit      <p>The limit of words as integer.</p>
12180
     * @param string $str_add_on <p>Replacement for the striped string.</p>
12181
     *
12182
     * @return string
12183
     */
12184 2
    public static function words_limit(
12185
        string $str,
12186
        int $limit = 100,
12187
        string $str_add_on = '…'
12188
    ): string {
12189 2
        if ($str === '' || $limit < 1) {
12190 2
            return '';
12191
        }
12192
12193 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
12194
12195
        if (
12196 2
            !isset($matches[0])
12197
            ||
12198 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12199
        ) {
12200 2
            return $str;
12201
        }
12202
12203 2
        return \rtrim($matches[0]) . $str_add_on;
12204
    }
12205
12206
    /**
12207
     * Wraps a string to a given number of characters
12208
     *
12209
     * @see http://php.net/manual/en/function.wordwrap.php
12210
     *
12211
     * @param string $str   <p>The input string.</p>
12212
     * @param int    $width [optional] <p>The column width.</p>
12213
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12214
     * @param bool   $cut   [optional] <p>
12215
     *                      If the cut is set to true, the string is
12216
     *                      always wrapped at or before the specified width. So if you have
12217
     *                      a word that is larger than the given width, it is broken apart.
12218
     *                      </p>
12219
     *
12220
     * @return string
12221
     *                <p>The given string wrapped at the specified column.</p>
12222
     */
12223 12
    public static function wordwrap(
12224
        string $str,
12225
        int $width = 75,
12226
        string $break = "\n",
12227
        bool $cut = false
12228
    ): string {
12229 12
        if ($str === '' || $break === '') {
12230 4
            return '';
12231
        }
12232
12233 10
        $str_split = \explode($break, $str);
12234 10
        if ($str_split === false) {
12235
            return '';
12236
        }
12237
12238 10
        $chars = [];
12239 10
        $word_split = '';
12240 10
        foreach ($str_split as $i => $i_value) {
12241 10
            if ($i) {
12242 3
                $chars[] = $break;
12243 3
                $word_split .= '#';
12244
            }
12245
12246 10
            foreach (self::str_split($i_value) as $c) {
12247 10
                $chars[] = $c;
12248 10
                if ($c === ' ') {
12249 3
                    $word_split .= ' ';
12250
                } else {
12251 10
                    $word_split .= '?';
12252
                }
12253
            }
12254
        }
12255
12256 10
        $str_return = '';
12257 10
        $j = 0;
12258 10
        $b = -1;
12259 10
        $i = -1;
12260 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
12261
12262 10
        $max = \mb_strlen($word_split);
12263 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
12264 8
            for (++$i; $i < $b; ++$i) {
12265 8
                $str_return .= $chars[$j];
12266 8
                unset($chars[$j++]);
12267
12268
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12269 8
                if ($i > $max) {
12270
                    break 2;
12271
                }
12272
            }
12273
12274
            if (
12275 8
                $break === $chars[$j]
12276
                ||
12277 8
                $chars[$j] === ' '
12278
            ) {
12279 5
                unset($chars[$j++]);
12280
            }
12281
12282 8
            $str_return .= $break;
12283
12284
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
12285 8
            if ($b > $max) {
12286
                break;
12287
            }
12288
        }
12289
12290 10
        return $str_return . \implode('', $chars);
12291
    }
12292
12293
    /**
12294
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
12295
     *    ... so that we wrap the per line.
12296
     *
12297
     * @param string      $str             <p>The input string.</p>
12298
     * @param int         $width           [optional] <p>The column width.</p>
12299
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
12300
     * @param bool        $cut             [optional] <p>
12301
     *                                     If the cut is set to true, the string is
12302
     *                                     always wrapped at or before the specified width. So if you have
12303
     *                                     a word that is larger than the given width, it is broken apart.
12304
     *                                     </p>
12305
     * @param bool        $add_final_break [optional] <p>
12306
     *                                     If this flag is true, then the method will add a $break at the end
12307
     *                                     of the result string.
12308
     *                                     </p>
12309
     * @param string|null $delimiter       [optional] <p>
12310
     *                                     You can change the default behavior, where we split the string by newline.
12311
     *                                     </p>
12312
     *
12313
     * @return string
12314
     */
12315 1
    public static function wordwrap_per_line(
12316
        string $str,
12317
        int $width = 75,
12318
        string $break = "\n",
12319
        bool $cut = false,
12320
        bool $add_final_break = true,
12321
        string $delimiter = null
12322
    ): string {
12323 1
        if ($delimiter === null) {
12324 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
12325
        } else {
12326 1
            $strings = \explode($delimiter, $str);
12327
        }
12328
12329 1
        $string_helper_array = [];
12330 1
        if ($strings !== false) {
12331 1
            foreach ($strings as $value) {
12332 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
12333
            }
12334
        }
12335
12336 1
        if ($add_final_break) {
12337 1
            $final_break = $break;
12338
        } else {
12339 1
            $final_break = '';
12340
        }
12341
12342 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
12343
    }
12344
12345
    /**
12346
     * Returns an array of Unicode White Space characters.
12347
     *
12348
     * @return string[] an array with numeric code point as key and White Space Character as value
12349
     */
12350 2
    public static function ws(): array
12351
    {
12352 2
        return self::$WHITESPACE;
12353
    }
12354
12355
    /**
12356
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
12357
     *
12358
     * @see http://hsivonen.iki.fi/php-utf8/
12359
     *
12360
     * @param string $str    <p>The string to be checked.</p>
12361
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
12362
     *
12363
     * @return bool
12364
     *
12365
     * @noinspection ReturnTypeCanBeDeclaredInspection
12366
     */
12367 108
    private static function is_utf8_string(string $str, bool $strict = false)
12368
    {
12369 108
        if ($str === '') {
12370 14
            return true;
12371
        }
12372
12373 102
        if ($strict === true) {
12374 2
            $is_binary = self::is_binary($str, true);
12375
12376 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
12377 2
                return false;
12378
            }
12379
12380
            if ($is_binary && self::is_utf32($str, false) !== false) {
12381
                return false;
12382
            }
12383
        }
12384
12385 102
        if (self::pcre_utf8_support() !== true) {
12386
            // If even just the first character can be matched, when the /u
12387
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
12388
            // invalid, nothing at all will match, even if the string contains
12389
            // some valid sequences
12390
            return \preg_match('/^./us', $str, $ar) === 1;
12391
        }
12392
12393 102
        $mState = 0; // cached expected number of octets after the current octet
12394
        // until the beginning of the next UTF8 character sequence
12395 102
        $mUcs4 = 0; // cached Unicode character
12396 102
        $mBytes = 1; // cached expected number of octets in the current sequence
12397
12398 102
        if (self::$ORD === null) {
12399
            self::$ORD = self::getData('ord');
12400
        }
12401
12402 102
        $len = \strlen($str);
12403
        /** @noinspection ForeachInvariantsInspection */
12404 102
        for ($i = 0; $i < $len; ++$i) {
12405 102
            $in = self::$ORD[$str[$i]];
12406
12407 102
            if ($mState === 0) {
12408
                // When mState is zero we expect either a US-ASCII character or a
12409
                // multi-octet sequence.
12410 102
                if ((0x80 & $in) === 0) {
12411
                    // US-ASCII, pass straight through.
12412 97
                    $mBytes = 1;
12413 83
                } elseif ((0xE0 & $in) === 0xC0) {
12414
                    // First octet of 2 octet sequence.
12415 73
                    $mUcs4 = $in;
12416 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
12417 73
                    $mState = 1;
12418 73
                    $mBytes = 2;
12419 58
                } elseif ((0xF0 & $in) === 0xE0) {
12420
                    // First octet of 3 octet sequence.
12421 42
                    $mUcs4 = $in;
12422 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
12423 42
                    $mState = 2;
12424 42
                    $mBytes = 3;
12425 29
                } elseif ((0xF8 & $in) === 0xF0) {
12426
                    // First octet of 4 octet sequence.
12427 18
                    $mUcs4 = $in;
12428 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
12429 18
                    $mState = 3;
12430 18
                    $mBytes = 4;
12431 13
                } elseif ((0xFC & $in) === 0xF8) {
12432
                    /* First octet of 5 octet sequence.
12433
                     *
12434
                     * This is illegal because the encoded codepoint must be either
12435
                     * (a) not the shortest form or
12436
                     * (b) outside the Unicode range of 0-0x10FFFF.
12437
                     * Rather than trying to resynchronize, we will carry on until the end
12438
                     * of the sequence and let the later error handling code catch it.
12439
                     */
12440 5
                    $mUcs4 = $in;
12441 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
12442 5
                    $mState = 4;
12443 5
                    $mBytes = 5;
12444 10
                } elseif ((0xFE & $in) === 0xFC) {
12445
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
12446 5
                    $mUcs4 = $in;
12447 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
12448 5
                    $mState = 5;
12449 5
                    $mBytes = 6;
12450
                } else {
12451
                    // Current octet is neither in the US-ASCII range nor a legal first
12452
                    // octet of a multi-octet sequence.
12453 102
                    return false;
12454
                }
12455 83
            } elseif ((0xC0 & $in) === 0x80) {
12456
12457
                // When mState is non-zero, we expect a continuation of the multi-octet
12458
                // sequence
12459
12460
                // Legal continuation.
12461 75
                $shift = ($mState - 1) * 6;
12462 75
                $tmp = $in;
12463 75
                $tmp = ($tmp & 0x0000003F) << $shift;
12464 75
                $mUcs4 |= $tmp;
12465
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
12466
                // Unicode code point to be output.
12467 75
                if (--$mState === 0) {
12468
                    // Check for illegal sequences and code points.
12469
                    //
12470
                    // From Unicode 3.1, non-shortest form is illegal
12471
                    if (
12472 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
12473
                        ||
12474 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
12475
                        ||
12476 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
12477
                        ||
12478 75
                        ($mBytes > 4)
12479
                        ||
12480
                        // From Unicode 3.2, surrogate characters are illegal.
12481 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
12482
                        ||
12483
                        // Code points outside the Unicode range are illegal.
12484 75
                        ($mUcs4 > 0x10FFFF)
12485
                    ) {
12486 9
                        return false;
12487
                    }
12488
                    // initialize UTF8 cache
12489 75
                    $mState = 0;
12490 75
                    $mUcs4 = 0;
12491 75
                    $mBytes = 1;
12492
                }
12493
            } else {
12494
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
12495
                // Incomplete multi-octet sequence.
12496 35
                return false;
12497
            }
12498
        }
12499
12500 67
        return true;
12501
    }
12502
12503
    /**
12504
     * @param string $str
12505
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
12506
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
12507
     *
12508
     * @return string
12509
     *
12510
     * @noinspection ReturnTypeCanBeDeclaredInspection
12511
     */
12512 33
    private static function fixStrCaseHelper(
12513
        string $str,
12514
        $use_lowercase = false,
12515
        $use_full_case_fold = false
12516
    ) {
12517 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12518 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12519
12520 33
        if ($use_lowercase === true) {
12521 2
            $str = \str_replace(
12522 2
                $upper,
12523 2
                $lower,
12524 2
                $str
12525
            );
12526
        } else {
12527 31
            $str = \str_replace(
12528 31
                $lower,
12529 31
                $upper,
12530 31
                $str
12531
            );
12532
        }
12533
12534 33
        if ($use_full_case_fold) {
12535 31
            static $FULL_CASE_FOLD = null;
12536 31
            if ($FULL_CASE_FOLD === null) {
12537 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12538
            }
12539
12540 31
            if ($use_lowercase === true) {
12541 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12542
            } else {
12543 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12544
            }
12545
        }
12546
12547 33
        return $str;
12548
    }
12549
12550
    /**
12551
     * get data from "/data/*.php"
12552
     *
12553
     * @param string $file
12554
     *
12555
     * @return array
12556
     *
12557
     * @noinspection ReturnTypeCanBeDeclaredInspection
12558
     */
12559 6
    private static function getData(string $file)
12560
    {
12561
        /** @noinspection PhpIncludeInspection */
12562
        /** @noinspection UsingInclusionReturnValueInspection */
12563
        /** @psalm-suppress UnresolvableInclude */
12564 6
        return include __DIR__ . '/data/' . $file . '.php';
12565
    }
12566
12567
    /**
12568
     * @return true|null
12569
     */
12570 12
    private static function initEmojiData()
12571
    {
12572 12
        if (self::$EMOJI_KEYS_CACHE === null) {
12573 1
            if (self::$EMOJI === null) {
12574 1
                self::$EMOJI = self::getData('emoji');
12575
            }
12576
12577 1
            \uksort(
12578 1
                self::$EMOJI,
12579
                static function (string $a, string $b): int {
12580 1
                    return \strlen($b) <=> \strlen($a);
12581 1
                }
12582
            );
12583
12584 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12585 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12586
12587 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12588 1
                $tmp_key = \crc32($key);
12589 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
12590
            }
12591
12592 1
            return true;
12593
        }
12594
12595 12
        return null;
12596
    }
12597
12598
    /**
12599
     * Checks whether mbstring "overloaded" is active on the server.
12600
     *
12601
     * @return bool
12602
     *
12603
     * @noinspection ReturnTypeCanBeDeclaredInspection
12604
     */
12605
    private static function mbstring_overloaded()
12606
    {
12607
        /**
12608
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12609
         */
12610
12611
        /** @noinspection PhpComposerExtensionStubsInspection */
12612
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12613
        return \defined('MB_OVERLOAD_STRING')
12614
               &&
12615
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12616
    }
12617
12618
    /**
12619
     * @param array    $strings
12620
     * @param bool     $remove_empty_values
12621
     * @param int|null $remove_short_values
12622
     *
12623
     * @return array
12624
     *
12625
     * @noinspection ReturnTypeCanBeDeclaredInspection
12626
     */
12627 2
    private static function reduce_string_array(
12628
        array $strings,
12629
        bool $remove_empty_values,
12630
        int $remove_short_values = null
12631
    ) {
12632
        // init
12633 2
        $return = [];
12634
12635 2
        foreach ($strings as &$str) {
12636
            if (
12637 2
                $remove_short_values !== null
12638
                &&
12639 2
                \mb_strlen($str) <= $remove_short_values
12640
            ) {
12641 2
                continue;
12642
            }
12643
12644
            if (
12645 2
                $remove_empty_values === true
12646
                &&
12647 2
                \trim($str) === ''
12648
            ) {
12649 2
                continue;
12650
            }
12651
12652 2
            $return[] = $str;
12653
        }
12654
12655 2
        return $return;
12656
    }
12657
12658
    /**
12659
     * rxClass
12660
     *
12661
     * @param string $s
12662
     * @param string $class
12663
     *
12664
     * @return string
12665
     *
12666
     * @noinspection ReturnTypeCanBeDeclaredInspection
12667
     */
12668 33
    private static function rxClass(string $s, string $class = '')
12669
    {
12670 33
        static $RX_CLASS_CACHE = [];
12671
12672 33
        $cache_key = $s . $class;
12673
12674 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
12675 21
            return $RX_CLASS_CACHE[$cache_key];
12676
        }
12677
12678 16
        $class_array = [$class];
12679
12680
        /** @noinspection SuspiciousLoopInspection */
12681
        /** @noinspection AlterInForeachInspection */
12682 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12683 15
            if ($s === '-') {
12684
                $class_array[0] = '-' . $class_array[0];
12685 15
            } elseif (!isset($s[2])) {
12686 15
                $class_array[0] .= \preg_quote($s, '/');
12687 1
            } elseif (self::strlen($s) === 1) {
12688 1
                $class_array[0] .= $s;
12689
            } else {
12690 15
                $class_array[] = $s;
12691
            }
12692
        }
12693
12694 16
        if ($class_array[0]) {
12695 16
            $class_array[0] = '[' . $class_array[0] . ']';
12696
        }
12697
12698 16
        if (\count($class_array) === 1) {
12699 16
            $return = $class_array[0];
12700
        } else {
12701
            $return = '(?:' . \implode('|', $class_array) . ')';
12702
        }
12703
12704 16
        $RX_CLASS_CACHE[$cache_key] = $return;
12705
12706 16
        return $return;
12707
    }
12708
12709
    /**
12710
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12711
     *
12712
     * @param string $names
12713
     * @param string $delimiter
12714
     * @param string $encoding
12715
     *
12716
     * @return string
12717
     *
12718
     * @noinspection ReturnTypeCanBeDeclaredInspection
12719
     */
12720 1
    private static function str_capitalize_name_helper(
12721
        string $names,
12722
        string $delimiter,
12723
        string $encoding = 'UTF-8'
12724
    ) {
12725
        // init
12726 1
        $name_helper_array = \explode($delimiter, $names);
12727 1
        if ($name_helper_array === false) {
12728
            return '';
12729
        }
12730
12731
        $special_cases = [
12732 1
            'names' => [
12733
                'ab',
12734
                'af',
12735
                'al',
12736
                'and',
12737
                'ap',
12738
                'bint',
12739
                'binte',
12740
                'da',
12741
                'de',
12742
                'del',
12743
                'den',
12744
                'der',
12745
                'di',
12746
                'dit',
12747
                'ibn',
12748
                'la',
12749
                'mac',
12750
                'nic',
12751
                'of',
12752
                'ter',
12753
                'the',
12754
                'und',
12755
                'van',
12756
                'von',
12757
                'y',
12758
                'zu',
12759
            ],
12760
            'prefixes' => [
12761
                'al-',
12762
                "d'",
12763
                'ff',
12764
                "l'",
12765
                'mac',
12766
                'mc',
12767
                'nic',
12768
            ],
12769
        ];
12770
12771 1
        foreach ($name_helper_array as &$name) {
12772 1
            if (\in_array($name, $special_cases['names'], true)) {
12773 1
                continue;
12774
            }
12775
12776 1
            $continue = false;
12777
12778 1
            if ($delimiter === '-') {
12779
                /** @noinspection AlterInForeachInspection */
12780 1
                foreach ((array) $special_cases['names'] as &$beginning) {
12781 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12782 1
                        $continue = true;
12783
                    }
12784
                }
12785
            }
12786
12787
            /** @noinspection AlterInForeachInspection */
12788 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
12789 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12790 1
                    $continue = true;
12791
                }
12792
            }
12793
12794 1
            if ($continue === true) {
12795 1
                continue;
12796
            }
12797
12798 1
            $name = self::ucfirst($name);
12799
        }
12800
12801 1
        return \implode($delimiter, $name_helper_array);
12802
    }
12803
12804
    /**
12805
     * Generic case-sensitive transformation for collation matching.
12806
     *
12807
     * @param string $str <p>The input string</p>
12808
     *
12809
     * @return string|null
12810
     */
12811 6
    private static function strtonatfold(string $str)
12812
    {
12813
        /** @noinspection PhpUndefinedClassInspection */
12814 6
        return \preg_replace(
12815 6
            '/\p{Mn}+/u',
12816 6
            '',
12817 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12818
        );
12819
    }
12820
12821
    /**
12822
     * @param int|string $input
12823
     *
12824
     * @return string
12825
     *
12826
     * @noinspection ReturnTypeCanBeDeclaredInspection
12827
     */
12828 31
    private static function to_utf8_convert_helper($input)
12829
    {
12830
        // init
12831 31
        $buf = '';
12832
12833 31
        if (self::$ORD === null) {
12834 1
            self::$ORD = self::getData('ord');
12835
        }
12836
12837 31
        if (self::$CHR === null) {
12838 1
            self::$CHR = self::getData('chr');
12839
        }
12840
12841 31
        if (self::$WIN1252_TO_UTF8 === null) {
12842 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12843
        }
12844
12845 31
        $ordC1 = self::$ORD[$input];
12846 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12847 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12848
        } else {
12849
            /** @noinspection OffsetOperationsInspection */
12850 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12851 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
12852 1
            $buf .= $cc1 . $cc2;
12853
        }
12854
12855 31
        return $buf;
12856
    }
12857
12858
    /**
12859
     * @param string $str
12860
     *
12861
     * @return string
12862
     *
12863
     * @noinspection ReturnTypeCanBeDeclaredInspection
12864
     */
12865 9
    private static function urldecode_unicode_helper(string $str)
12866
    {
12867 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12868 9
        if (\preg_match($pattern, $str)) {
12869 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12870
        }
12871
12872 9
        return $str;
12873
    }
12874
}
12875