Passed
Push — master ( 9d2783...b1e825 )
by Lars
03:34
created

UTF8   F

Complexity

Total Complexity 1706

Size/Duplication

Total Lines 12640
Duplicated Lines 0 %

Test Coverage

Coverage 79.61%

Importance

Changes 91
Bugs 51 Features 6
Metric Value
eloc 4384
c 91
b 51
f 6
dl 0
loc 12640
ccs 3058
cts 3841
cp 0.7961
rs 0.8
wmc 1706

298 Methods

Rating   Name   Duplication   Size   Complexity  
A str_dasherize() 0 3 1
A add_bom_to_string() 0 7 2
A ctype_loaded() 0 3 1
A chr_to_int() 0 3 1
A decode_mimeheader() 0 15 5
A css_stripe_media_queries() 0 6 1
A __construct() 0 2 1
B between() 0 48 8
A char_at() 0 7 2
A chars() 0 3 1
A collapse_whitespace() 0 8 2
A access() 0 11 4
A callback() 0 3 1
A bom() 0 3 1
A encode_mimeheader() 0 25 5
A first_char() 0 11 4
A finfo_loaded() 0 3 1
A max() 0 14 3
A str_contains() 0 10 2
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A has_uppercase() 0 8 2
A remove_left() 0 21 4
A remove_html() 0 3 1
A html_escape() 0 6 1
A regex_replace() 0 20 3
A replace_all() 0 11 2
A is_alpha() 0 8 2
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A str_contains_all() 0 23 6
A normalize_line_ending() 0 3 1
A spaces_to_tabs() 0 11 3
A is_blank() 0 8 2
A replace() 0 11 2
A remove_right() 0 22 4
A remove_html_breaks() 0 3 1
A is_binary_file() 0 16 3
A html_stripe_empty_tags() 0 6 1
A json_loaded() 0 3 1
A hasBom() 0 3 1
A is_empty() 0 3 1
A is_alphanumeric() 0 8 2
A has_lowercase() 0 8 2
A isJson() 0 3 1
A chr_to_decimal() 0 30 6
A file_has_bom() 0 8 2
A str_begins() 0 3 1
B str_camelize() 0 70 10
A parse_str() 0 16 4
A filter_input() 0 13 2
A array_change_key_case() 0 20 5
A count_chars() 0 11 1
A max_chr_width() 0 8 2
A isBinary() 0 3 1
A ltrim() 0 19 4
A emoji_decode() 0 16 2
D is_utf8() 0 144 31
A lcword() 0 8 1
A mbstring_loaded() 0 3 1
D chr() 0 101 18
C normalize_encoding() 0 134 14
C get_file_type() 0 95 15
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 54 13
A normalize_whitespace() 0 30 6
A isBase64() 0 3 1
A is_html() 0 12 2
A html_decode() 0 3 1
A isUtf32() 0 3 1
A rtrim() 0 19 4
A chunk_split() 0 3 1
A removeBOM() 0 3 1
A emoji_encode() 0 16 2
B get_random_string() 0 53 10
A fix_utf8() 0 30 4
A isUtf8() 0 3 1
A clean() 0 46 6
A is_ascii() 0 7 2
B range() 0 41 10
B rawurldecode() 0 37 8
A normalize_msword() 0 43 2
D getCharDirection() 0 105 118
A htmlspecialchars() 0 11 3
A filter_var_array() 0 9 2
A decimal_to_chr() 0 3 1
A pcre_utf8_support() 0 4 1
A codepoints() 0 29 4
A lowerCaseFirst() 0 8 1
A chr_map() 0 5 1
A cleanup() 0 25 2
A showSupport() 0 8 2
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 15 4
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
A lcfirst() 0 44 5
A fits_inside() 0 3 1
A intl_loaded() 0 3 1
A chr_size_list() 0 17 3
A remove_bom() 0 21 5
F extract_text() 0 175 34
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A str_capitalize_name() 0 8 1
A iconv_loaded() 0 3 1
A lcwords() 0 31 6
A isAscii() 0 3 1
A normalizeEncoding() 0 3 1
A filter_var() 0 9 2
B html_encode() 0 42 7
A isUtf16() 0 3 1
F encode() 0 139 37
C is_utf32() 0 65 16
C ord() 0 65 16
A json_decode() 0 14 2
A fix_simple_utf8() 0 19 4
A checkForSupport() 0 47 4
B is_json() 0 27 8
A int_to_hex() 0 7 2
A json_encode() 0 10 2
A is_base64() 0 16 5
A hex_to_int() 0 14 3
A htmlentities() 0 23 3
A hex_to_chr() 0 3 1
A filter_input_array() 0 9 2
A getSupportInfo() 0 13 3
A replace_diamond_question_mark() 0 35 5
A chr_to_hex() 0 11 3
A min() 0 14 3
C html_entity_decode() 0 86 17
A split() 0 6 1
A remove_duplicates() 0 14 4
B file_get_contents() 0 58 11
A binary_to_str() 0 12 3
A str_substr_after_first_separator() 0 28 6
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 96 19
A str_isubstr_last() 0 20 4
A str_replace_beginning() 0 21 6
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 24 2
A str_iends_with() 0 11 3
C utf8_decode() 0 60 13
B str_longest_common_suffix() 0 51 10
C wordwrap() 0 52 12
B ucfirst() 0 57 7
A str_pad_both() 0 7 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A str_limit() 0 26 6
A toUTF8() 0 3 1
A string() 0 10 1
B rxClass() 0 39 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 152 5
A str_starts_with() 0 11 3
A str_humanize() 0 15 1
C substr_count_in_byte() 0 54 15
A strchr() 0 8 1
A strichr() 0 8 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
C str_longest_common_substring() 0 73 16
A titlecase() 0 24 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 54 10
B urldecode() 0 37 8
A str_isubstr_before_first_separator() 0 16 5
B strrev() 0 43 10
D substr_replace() 0 124 27
A strstr_in_byte() 0 12 4
A str_matches_pattern() 0 3 1
B str_titleize() 0 55 10
A ws() 0 3 1
A str_replace_first() 0 17 2
A toLatin1() 0 3 1
A str_pad_right() 0 7 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 40 8
A str_iends() 0 3 1
A trim() 0 19 4
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 9 2
D str_pad() 0 146 16
A urldecode_unicode_helper() 0 8 2
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 10 3
B strtr() 0 33 8
A str_isubstr_after_last_separator() 0 23 5
B strspn() 0 30 10
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
A str_ends() 0 3 1
B str_capitalize_name_helper() 0 80 10
A utf8_encode() 0 16 3
C str_detect_encoding() 0 111 13
A str_istarts_with() 0 11 3
A str_replace() 0 14 1
A substr_iright() 0 15 4
A to_iso8859() 0 16 4
A words_limit() 0 17 5
A strip_tags() 0 15 4
A str_isubstr_before_last_separator() 0 21 6
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 118 25
A str_replace_last() 0 19 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 11 3
F to_ascii() 0 149 27
A reduce_string_array() 0 26 6
B str_longest_common_prefix() 0 48 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 7 1
A str_substr_first() 0 33 6
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 32 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 23 5
B str_snakeize() 0 55 6
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 3 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A getDataIfExists() 0 10 2
A toAscii() 0 3 1
A str_ibegins() 0 3 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 8 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
A strtonatfold() 0 6 1
C strcspn() 0 51 12
A fixStrCaseHelper() 0 33 5
B str_split_pattern() 0 49 11
D strstr() 0 92 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 13 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 15 3
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A to_utf8_convert_helper() 0 27 5
B str_delimit() 0 33 8
B strtoupper() 0 54 10
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A str_istarts_with_any() 0 17 5
B str_contains_any() 0 29 8
A initEmojiData() 0 26 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 9 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 5 1
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $INTL_TRANSLITERATOR_LIST;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ENCODINGS;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $ORD;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
234
235
    /**
236
     * @var array|null
237
     */
238
    private static $CHR;
239
240
    /**
241
     * __construct()
242
     */
243 32
    public function __construct()
244
    {
245 32
    }
246
247
    /**
248
     * Return the character at the specified position: $str[1] like functionality.
249
     *
250
     * @param string $str      <p>A UTF-8 string.</p>
251
     * @param int    $pos      <p>The position of character to return.</p>
252
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
253
     *
254
     * @return string single multi-byte character
255
     */
256 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
257
    {
258 3
        if ($str === '' || $pos < 0) {
259 2
            return '';
260
        }
261
262 3
        if ($encoding === 'UTF-8') {
263 3
            return (string) \mb_substr($str, $pos, 1);
264
        }
265
266
        return (string) self::substr($str, $pos, 1, $encoding);
267
    }
268
269
    /**
270
     * Prepends UTF-8 BOM character to the string and returns the whole string.
271
     *
272
     * INFO: If BOM already existed there, the Input string is returned.
273
     *
274
     * @param string $str <p>The input string.</p>
275
     *
276
     * @return string the output string that contains BOM
277
     */
278 2
    public static function add_bom_to_string(string $str): string
279
    {
280 2
        if (self::string_has_bom($str) === false) {
281 2
            $str = self::bom() . $str;
282
        }
283
284 2
        return $str;
285
    }
286
287
    /**
288
     * Changes all keys in an array.
289
     *
290
     * @param array  $array    <p>The array to work on</p>
291
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
292
     *                         or <strong>CASE_LOWER</strong> (default)</p>
293
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
294
     *
295
     * @return string[] an array with its keys lower or uppercased
296
     */
297 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
298
    {
299
        if (
300 2
            $case !== \CASE_LOWER
301
            &&
302 2
            $case !== \CASE_UPPER
303
        ) {
304
            $case = \CASE_LOWER;
305
        }
306
307 2
        $return = [];
308 2
        foreach ($array as $key => &$value) {
309 2
            $key = $case === \CASE_LOWER
310 2
                ? self::strtolower((string) $key, $encoding)
311 2
                : self::strtoupper((string) $key, $encoding);
312
313 2
            $return[$key] = $value;
314
        }
315
316 2
        return $return;
317
    }
318
319
    /**
320
     * Returns the substring between $start and $end, if found, or an empty
321
     * string. An optional offset may be supplied from which to begin the
322
     * search for the start string.
323
     *
324
     * @param string $str
325
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
326
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
327
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
329
     *
330
     * @return string
331
     */
332 16
    public static function between(
333
        string $str,
334
        string $start,
335
        string $end,
336
        int $offset = 0,
337
        string $encoding = 'UTF-8'
338
    ): string {
339 16
        if ($encoding === 'UTF-8') {
340 8
            $posStart = \mb_strpos($str, $start, $offset);
341 8
            if ($posStart === false) {
342 1
                return '';
343
            }
344
345 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
346 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
347
            if (
348 7
                $posEnd === false
349
                ||
350 7
                $posEnd === $substrIndex
351
            ) {
352 2
                return '';
353
            }
354
355 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
356
        }
357
358 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
359
360 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
361 8
        if ($posStart === false) {
362 1
            return '';
363
        }
364
365 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
366 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
367
        if (
368 7
            $posEnd === false
369
            ||
370 7
            $posEnd === $substrIndex
371
        ) {
372 2
            return '';
373
        }
374
375 5
        return (string) self::substr(
376 5
            $str,
377 5
            $substrIndex,
378 5
            $posEnd - $substrIndex,
379 5
            $encoding
380
        );
381
    }
382
383
    /**
384
     * Convert binary into an string.
385
     *
386
     * @param mixed $bin 1|0
387
     *
388
     * @return string
389
     */
390 2
    public static function binary_to_str($bin): string
391
    {
392 2
        if (!isset($bin[0])) {
393
            return '';
394
        }
395
396 2
        $convert = \base_convert($bin, 2, 16);
397 2
        if ($convert === '0') {
398 1
            return '';
399
        }
400
401 2
        return \pack('H*', $convert);
402
    }
403
404
    /**
405
     * Returns the UTF-8 Byte Order Mark Character.
406
     *
407
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
408
     *
409
     * @return string UTF-8 Byte Order Mark
410
     */
411 4
    public static function bom(): string
412
    {
413 4
        return "\xef\xbb\xbf";
414
    }
415
416
    /**
417
     * @alias of UTF8::chr_map()
418
     *
419
     * @param array|string $callback
420
     * @param string       $str
421
     *
422
     * @return string[]
423
     *
424
     * @see UTF8::chr_map()
425
     */
426 2
    public static function callback($callback, string $str): array
427
    {
428 2
        return self::chr_map($callback, $str);
429
    }
430
431
    /**
432
     * Returns the character at $index, with indexes starting at 0.
433
     *
434
     * @param string $str      <p>The input string.</p>
435
     * @param int    $index    <p>Position of the character.</p>
436
     * @param string $encoding [optional] <p>Default is UTF-8</p>
437
     *
438
     * @return string the character at $index
439
     */
440 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
441
    {
442 9
        if ($encoding === 'UTF-8') {
443 5
            return (string) \mb_substr($str, $index, 1);
444
        }
445
446 4
        return (string) self::substr($str, $index, 1, $encoding);
447
    }
448
449
    /**
450
     * Returns an array consisting of the characters in the string.
451
     *
452
     * @param string $str <p>The input string.</p>
453
     *
454
     * @return string[] an array of chars
455
     */
456 3
    public static function chars(string $str): array
457
    {
458 3
        return self::str_split($str);
459
    }
460
461
    /**
462
     * This method will auto-detect your server environment for UTF-8 support.
463
     *
464
     * @return true|null
465
     *
466
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
467
     */
468 5
    public static function checkForSupport()
469
    {
470 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
471
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
472
473
            // http://php.net/manual/en/book.mbstring.php
474
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
475
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
476
            if (self::$SUPPORT['mbstring'] === true) {
477
                \mb_internal_encoding('UTF-8');
478
                /** @noinspection UnusedFunctionResultInspection */
479
                /** @noinspection PhpComposerExtensionStubsInspection */
480
                \mb_regex_encoding('UTF-8');
481
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
482
            }
483
484
            // http://php.net/manual/en/book.iconv.php
485
            self::$SUPPORT['iconv'] = self::iconv_loaded();
486
487
            // http://php.net/manual/en/book.intl.php
488
            self::$SUPPORT['intl'] = self::intl_loaded();
489
490
            // http://php.net/manual/en/class.intlchar.php
491
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
492
493
            // http://php.net/manual/en/book.ctype.php
494
            self::$SUPPORT['ctype'] = self::ctype_loaded();
495
496
            // http://php.net/manual/en/class.finfo.php
497
            self::$SUPPORT['finfo'] = self::finfo_loaded();
498
499
            // http://php.net/manual/en/book.json.php
500
            self::$SUPPORT['json'] = self::json_loaded();
501
502
            // http://php.net/manual/en/book.pcre.php
503
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
504
505
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
506
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
507
                \mb_internal_encoding('UTF-8');
508
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
509
            }
510
511
            return true;
512
        }
513
514 5
        return null;
515
    }
516
517
    /**
518
     * Generates a UTF-8 encoded character from the given code point.
519
     *
520
     * INFO: opposite to UTF8::ord()
521
     *
522
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
523
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
524
     *
525
     * @return string|null multi-byte character, returns null on failure or empty input
526
     */
527 25
    public static function chr($code_point, string $encoding = 'UTF-8')
528
    {
529
        // init
530 25
        static $CHAR_CACHE = [];
531
532 25
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
533 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
534
        }
535
536
        if (
537 25
            $encoding !== 'UTF-8'
538
            &&
539 25
            $encoding !== 'ISO-8859-1'
540
            &&
541 25
            $encoding !== 'WINDOWS-1252'
542
            &&
543 25
            self::$SUPPORT['mbstring'] === false
544
        ) {
545
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
546
        }
547
548 25
        $cacheKey = $code_point . $encoding;
549 25
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
550 23
            return $CHAR_CACHE[$cacheKey];
551
        }
552
553 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
554
555 12
            if (self::$CHR === null) {
556
                self::$CHR = (array) self::getData('chr');
557
            }
558
559
            /**
560
             * @psalm-suppress PossiblyNullArrayAccess
561
             */
562 12
            $chr = self::$CHR[$code_point];
563
564 12
            if ($encoding !== 'UTF-8') {
565 1
                $chr = self::encode($encoding, $chr);
566
            }
567
568 12
            return $CHAR_CACHE[$cacheKey] = $chr;
569
        }
570
571
        //
572
        // fallback via "IntlChar"
573
        //
574
575 7
        if (self::$SUPPORT['intlChar'] === true) {
576
            /** @noinspection PhpComposerExtensionStubsInspection */
577 7
            $chr = \IntlChar::chr($code_point);
578
579 7
            if ($encoding !== 'UTF-8') {
580
                $chr = self::encode($encoding, $chr);
581
            }
582
583 7
            return $CHAR_CACHE[$cacheKey] = $chr;
584
        }
585
586
        //
587
        // fallback via vanilla php
588
        //
589
590
        if (self::$CHR === null) {
591
            self::$CHR = (array) self::getData('chr');
592
        }
593
594
        $code_point = (int) $code_point;
595
        if ($code_point <= 0x7F) {
596
            /**
597
             * @psalm-suppress PossiblyNullArrayAccess
598
             */
599
            $chr = self::$CHR[$code_point];
600
        } elseif ($code_point <= 0x7FF) {
601
            /**
602
             * @psalm-suppress PossiblyNullArrayAccess
603
             */
604
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
605
                   self::$CHR[($code_point & 0x3F) + 0x80];
606
        } elseif ($code_point <= 0xFFFF) {
607
            /**
608
             * @psalm-suppress PossiblyNullArrayAccess
609
             */
610
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
611
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
612
                   self::$CHR[($code_point & 0x3F) + 0x80];
613
        } else {
614
            /**
615
             * @psalm-suppress PossiblyNullArrayAccess
616
             */
617
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
618
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
619
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
620
                   self::$CHR[($code_point & 0x3F) + 0x80];
621
        }
622
623
        if ($encoding !== 'UTF-8') {
624
            $chr = self::encode($encoding, $chr);
625
        }
626
627
        return $CHAR_CACHE[$cacheKey] = $chr;
628
    }
629
630
    /**
631
     * Applies callback to all characters of a string.
632
     *
633
     * @param array|string $callback <p>The callback function.</p>
634
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
635
     *
636
     * @return string[] the outcome of callback
637
     */
638 2
    public static function chr_map($callback, string $str): array
639
    {
640 2
        return \array_map(
641 2
            $callback,
642 2
            self::str_split($str)
643
        );
644
    }
645
646
    /**
647
     * Generates an array of byte length of each character of a Unicode string.
648
     *
649
     * 1 byte => U+0000  - U+007F
650
     * 2 byte => U+0080  - U+07FF
651
     * 3 byte => U+0800  - U+FFFF
652
     * 4 byte => U+10000 - U+10FFFF
653
     *
654
     * @param string $str <p>The original unicode string.</p>
655
     *
656
     * @return int[] an array of byte lengths of each character
657
     */
658 4
    public static function chr_size_list(string $str): array
659
    {
660 4
        if ($str === '') {
661 4
            return [];
662
        }
663
664 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
665
            return \array_map(
666
                static function (string $data): int {
667
                    // "mb_" is available if overload is used, so use it ...
668
                    return \mb_strlen($data, 'CP850'); // 8-BIT
669
                },
670
                self::str_split($str)
671
            );
672
        }
673
674 4
        return \array_map('\strlen', self::str_split($str));
675
    }
676
677
    /**
678
     * Get a decimal code representation of a specific character.
679
     *
680
     * @param string $char <p>The input character.</p>
681
     *
682
     * @return int
683
     */
684 4
    public static function chr_to_decimal(string $char): int
685
    {
686 4
        $code = self::ord($char[0]);
687 4
        $bytes = 1;
688
689 4
        if (!($code & 0x80)) {
690
            // 0xxxxxxx
691 4
            return $code;
692
        }
693
694 4
        if (($code & 0xe0) === 0xc0) {
695
            // 110xxxxx
696 4
            $bytes = 2;
697 4
            $code &= ~0xc0;
698 4
        } elseif (($code & 0xf0) === 0xe0) {
699
            // 1110xxxx
700 4
            $bytes = 3;
701 4
            $code &= ~0xe0;
702 2
        } elseif (($code & 0xf8) === 0xf0) {
703
            // 11110xxx
704 2
            $bytes = 4;
705 2
            $code &= ~0xf0;
706
        }
707
708 4
        for ($i = 2; $i <= $bytes; ++$i) {
709
            // 10xxxxxx
710 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
711
        }
712
713 4
        return $code;
714
    }
715
716
    /**
717
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
718
     *
719
     * @param int|string $char <p>The input character</p>
720
     * @param string     $pfix [optional]
721
     *
722
     * @return string The code point encoded as U+xxxx
723
     */
724 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
725
    {
726 2
        if ($char === '') {
727 2
            return '';
728
        }
729
730 2
        if ($char === '&#0;') {
731 2
            $char = '';
732
        }
733
734 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
735
    }
736
737
    /**
738
     * alias for "UTF8::chr_to_decimal()"
739
     *
740
     * @param string $chr
741
     *
742
     * @return int
743
     *
744
     * @see UTF8::chr_to_decimal()
745
     */
746 2
    public static function chr_to_int(string $chr): int
747
    {
748 2
        return self::chr_to_decimal($chr);
749
    }
750
751
    /**
752
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
753
     *
754
     * @param string $body     <p>The original string to be split.</p>
755
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
756
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
757
     *
758
     * @return string the chunked string
759
     */
760 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
761
    {
762 4
        return \implode($end, self::str_split($body, $chunklen));
763
    }
764
765
    /**
766
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
767
     *
768
     * @param string $str                           <p>The string to be sanitized.</p>
769
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
770
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
771
     *                                              whitespace.</p>
772
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
773
     *                                              e.g.: "…"
774
     *                                              => "..."</p>
775
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
776
     *                                              combination with
777
     *                                              $normalize_whitespace</p>
778
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
779
     *                                              mark e.g.: "�"</p>
780
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
781
     *                                              characters e.g.: "\0"</p>
782
     *
783
     * @return string clean UTF-8 encoded string
784
     */
785 114
    public static function clean(
786
        string $str,
787
        bool $remove_bom = false,
788
        bool $normalize_whitespace = false,
789
        bool $normalize_msword = false,
790
        bool $keep_non_breaking_space = false,
791
        bool $replace_diamond_question_mark = false,
792
        bool $remove_invisible_characters = true
793
    ): string {
794
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
795
        // caused connection reset problem on larger strings
796
797 114
        $regx = '/
798
          (
799
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
800
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
801
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
802
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
803
            ){1,100}                      # ...one or more times
804
          )
805
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
806
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
807
        /x';
808 114
        $str = (string) \preg_replace($regx, '$1', $str);
809
810 114
        if ($replace_diamond_question_mark === true) {
811 60
            $str = self::replace_diamond_question_mark($str, '');
812
        }
813
814 114
        if ($remove_invisible_characters === true) {
815 114
            $str = self::remove_invisible_characters($str);
816
        }
817
818 114
        if ($normalize_whitespace === true) {
819 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
820
        }
821
822 114
        if ($normalize_msword === true) {
823 32
            $str = self::normalize_msword($str);
824
        }
825
826 114
        if ($remove_bom === true) {
827 64
            $str = self::remove_bom($str);
828
        }
829
830 114
        return $str;
831
    }
832
833
    /**
834
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
835
     *
836
     * @param string $str <p>The input string.</p>
837
     *
838
     * @return string
839
     */
840 33
    public static function cleanup($str): string
841
    {
842
        // init
843 33
        $str = (string) $str;
844
845 33
        if ($str === '') {
846 5
            return '';
847
        }
848
849
        // fixed ISO <-> UTF-8 Errors
850 33
        $str = self::fix_simple_utf8($str);
851
852
        // remove all none UTF-8 symbols
853
        // && remove diamond question mark (�)
854
        // && remove remove invisible characters (e.g. "\0")
855
        // && remove BOM
856
        // && normalize whitespace chars (but keep non-breaking-spaces)
857 33
        return self::clean(
858 33
            $str,
859 33
            true,
860 33
            true,
861 33
            false,
862 33
            true,
863 33
            true,
864 33
            true
865
        );
866
    }
867
868
    /**
869
     * Accepts a string or a array of strings and returns an array of Unicode code points.
870
     *
871
     * INFO: opposite to UTF8::string()
872
     *
873
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
874
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
875
     *                                 default, code points will be returned as integers.</p>
876
     *
877
     * @return array<int|string>
878
     *                           The array of code points:<br>
879
     *                           array<int> for $u_style === false<br>
880
     *                           array<string> for $u_style === true<br>
881
     */
882 12
    public static function codepoints($arg, bool $u_style = false): array
883
    {
884 12
        if (\is_string($arg) === true) {
885 12
            $arg = self::str_split($arg);
886
        }
887
888 12
        $arg = \array_map(
889
            [
890 12
                self::class,
891
                'ord',
892
            ],
893 12
            $arg
894
        );
895
896 12
        if (\count($arg) === 0) {
897 7
            return [];
898
        }
899
900 11
        if ($u_style === true) {
901 2
            $arg = \array_map(
902
                [
903 2
                    self::class,
904
                    'int_to_hex',
905
                ],
906 2
                $arg
907
            );
908
        }
909
910 11
        return $arg;
911
    }
912
913
    /**
914
     * Trims the string and replaces consecutive whitespace characters with a
915
     * single space. This includes tabs and newline characters, as well as
916
     * multibyte whitespace such as the thin space and ideographic space.
917
     *
918
     * @param string $str <p>The input string.</p>
919
     *
920
     * @return string string with a trimmed $str and condensed whitespace
921
     */
922 13
    public static function collapse_whitespace(string $str): string
923
    {
924 13
        if (self::$SUPPORT['mbstring'] === true) {
925
            /** @noinspection PhpComposerExtensionStubsInspection */
926 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
927
        }
928
929
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
930
    }
931
932
    /**
933
     * Returns count of characters used in a string.
934
     *
935
     * @param string $str                <p>The input string.</p>
936
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
937
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
938
     *
939
     * @return int[] an associative array of Character as keys and
940
     *               their count as values
941
     */
942 19
    public static function count_chars(
943
        string $str,
944
        bool $cleanUtf8 = false,
945
        bool $tryToUseMbFunction = true
946
    ): array {
947 19
        return \array_count_values(
948 19
            self::str_split(
949 19
                $str,
950 19
                1,
951 19
                $cleanUtf8,
952 19
                $tryToUseMbFunction
953
            )
954
        );
955
    }
956
957
    /**
958
     * Remove css media-queries.
959
     *
960
     * @param string $str
961
     *
962
     * @return string
963
     */
964 1
    public static function css_stripe_media_queries(string $str): string
965
    {
966 1
        return (string) \preg_replace(
967 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
968 1
            '',
969 1
            $str
970
        );
971
    }
972
973
    /**
974
     * Checks whether ctype is available on the server.
975
     *
976
     * @return bool
977
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
978
     */
979
    public static function ctype_loaded(): bool
980
    {
981
        return \extension_loaded('ctype');
982
    }
983
984
    /**
985
     * Converts a int-value into an UTF-8 character.
986
     *
987
     * @param mixed $int
988
     *
989
     * @return string
990
     */
991 19
    public static function decimal_to_chr($int): string
992
    {
993 19
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
994
    }
995
996
    /**
997
     * Decodes a MIME header field
998
     *
999
     * @param string $str
1000
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1001
     *
1002
     * @return false|string
1003
     *                      A decoded MIME field on success,
1004
     *                      or false if an error occurs during the decoding
1005
     */
1006
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1007
    {
1008
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1009
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1010
        }
1011
1012
        if (self::$SUPPORT['iconv'] === true) {
1013
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1014
        }
1015
1016
        if ($encoding !== 'UTF-8') {
1017
            $str = self::encode($encoding, $str);
1018
        }
1019
1020
        return \mb_decode_mimeheader($str);
1021
    }
1022
1023
    /**
1024
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1025
     *
1026
     * @param string $str                        <p>The input string.</p>
1027
     * @param bool   $useReversibleStringMapping [optional] <p>
1028
     *                                           When <b>TRUE</b>, we se a reversible string mapping
1029
     *                                           between "emoji_encode" and "emoji_decode".</p>
1030
     *
1031
     * @return string
1032
     */
1033 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
1034
    {
1035 9
        self::initEmojiData();
1036
1037 9
        if ($useReversibleStringMapping === true) {
1038 9
            return (string) \str_replace(
1039 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1040 9
                (array) self::$EMOJI_VALUES_CACHE,
1041 9
                $str
1042
            );
1043
        }
1044
1045 1
        return (string) \str_replace(
1046 1
            (array) self::$EMOJI_KEYS_CACHE,
1047 1
            (array) self::$EMOJI_VALUES_CACHE,
1048 1
            $str
1049
        );
1050
    }
1051
1052
    /**
1053
     * Encode a string with emoji chars into a non-emoji string.
1054
     *
1055
     * @param string $str                        <p>The input string</p>
1056
     * @param bool   $useReversibleStringMapping [optional] <p>
1057
     *                                           when <b>TRUE</b>, we se a reversible string mapping
1058
     *                                           between "emoji_encode" and "emoji_decode"</p>
1059
     *
1060
     * @return string
1061
     */
1062 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
1063
    {
1064 9
        self::initEmojiData();
1065
1066 9
        if ($useReversibleStringMapping === true) {
1067 9
            return (string) \str_replace(
1068 9
                (array) self::$EMOJI_VALUES_CACHE,
1069 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1070 9
                $str
1071
            );
1072
        }
1073
1074 1
        return (string) \str_replace(
1075 1
            (array) self::$EMOJI_VALUES_CACHE,
1076 1
            (array) self::$EMOJI_KEYS_CACHE,
1077 1
            $str
1078
        );
1079
    }
1080
1081
    /**
1082
     * Encode a string with a new charset-encoding.
1083
     *
1084
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1085
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1086
     *
1087
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1088
     * @param string $str                    <p>The input string</p>
1089
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1090
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1091
     *                                       string-encoding</p>
1092
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1093
     *                                       A empty string will trigger the autodetect anyway.</p>
1094
     *
1095
     * @return string
1096
     *
1097
     * @psalm-suppress InvalidReturnStatement
1098
     */
1099 28
    public static function encode(
1100
        string $toEncoding,
1101
        string $str,
1102
        bool $autodetectFromEncoding = true,
1103
        string $fromEncoding = ''
1104
    ): string {
1105 28
        if ($str === '' || $toEncoding === '') {
1106 13
            return $str;
1107
        }
1108
1109 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1110 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1111
        }
1112
1113 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1114 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1115
        }
1116
1117
        if (
1118 28
            $toEncoding
1119
            &&
1120 28
            $fromEncoding
1121
            &&
1122 28
            $fromEncoding === $toEncoding
1123
        ) {
1124
            return $str;
1125
        }
1126
1127 28
        if ($toEncoding === 'JSON') {
1128 1
            $return = self::json_encode($str);
1129 1
            if ($return === false) {
1130
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1131
            }
1132
1133 1
            return $return;
1134
        }
1135 28
        if ($fromEncoding === 'JSON') {
1136 1
            $str = self::json_decode($str);
1137 1
            $fromEncoding = '';
1138
        }
1139
1140 28
        if ($toEncoding === 'BASE64') {
1141 2
            return \base64_encode($str);
1142
        }
1143 28
        if ($fromEncoding === 'BASE64') {
1144 2
            $str = \base64_decode($str, true);
1145 2
            $fromEncoding = '';
1146
        }
1147
1148 28
        if ($toEncoding === 'HTML-ENTITIES') {
1149 2
            return self::html_encode($str, true, 'UTF-8');
1150
        }
1151 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1152 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1153 2
            $fromEncoding = '';
1154
        }
1155
1156 28
        $fromEncodingDetected = false;
1157
        if (
1158 28
            $autodetectFromEncoding === true
1159
            ||
1160 28
            !$fromEncoding
1161
        ) {
1162 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1163
        }
1164
1165
        // DEBUG
1166
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1167
1168 28
        if ($fromEncodingDetected !== false) {
1169 24
            $fromEncoding = $fromEncodingDetected;
1170 7
        } elseif ($autodetectFromEncoding === true) {
1171
            // fallback for the "autodetect"-mode
1172 7
            return self::to_utf8($str);
1173
        }
1174
1175
        if (
1176 24
            !$fromEncoding
1177
            ||
1178 24
            $fromEncoding === $toEncoding
1179
        ) {
1180 15
            return $str;
1181
        }
1182
1183
        if (
1184 19
            $toEncoding === 'UTF-8'
1185
            &&
1186
            (
1187 17
                $fromEncoding === 'WINDOWS-1252'
1188
                ||
1189 19
                $fromEncoding === 'ISO-8859-1'
1190
            )
1191
        ) {
1192 13
            return self::to_utf8($str);
1193
        }
1194
1195
        if (
1196 12
            $toEncoding === 'ISO-8859-1'
1197
            &&
1198
            (
1199 6
                $fromEncoding === 'WINDOWS-1252'
1200
                ||
1201 12
                $fromEncoding === 'UTF-8'
1202
            )
1203
        ) {
1204 6
            return self::to_iso8859($str);
1205
        }
1206
1207
        if (
1208 10
            $toEncoding !== 'UTF-8'
1209
            &&
1210 10
            $toEncoding !== 'ISO-8859-1'
1211
            &&
1212 10
            $toEncoding !== 'WINDOWS-1252'
1213
            &&
1214 10
            self::$SUPPORT['mbstring'] === false
1215
        ) {
1216
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1217
        }
1218
1219 10
        if (self::$SUPPORT['mbstring'] === true) {
1220
            // warning: do not use the symfony polyfill here
1221 10
            $strEncoded = \mb_convert_encoding(
1222 10
                $str,
1223 10
                $toEncoding,
1224 10
                $fromEncoding
1225
            );
1226
1227 10
            if ($strEncoded) {
1228 10
                return $strEncoded;
1229
            }
1230
        }
1231
1232
        $return = \iconv($fromEncoding, $toEncoding, $str);
1233
        if ($return !== false) {
1234
            return $return;
1235
        }
1236
1237
        return $str;
1238
    }
1239
1240
    /**
1241
     * @param string $str
1242
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1243
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1244
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1245
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1246
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1247
     *
1248
     * @return false|string
1249
     *                      An encoded MIME field on success,
1250
     *                      or false if an error occurs during the encoding
1251
     */
1252
    public static function encode_mimeheader(
1253
        $str,
1254
        $fromCharset = 'UTF-8',
1255
        $toCharset = 'UTF-8',
1256
        $transferEncoding = 'Q',
1257
        $linefeed = "\r\n",
1258
        $indent = 76
1259
    ) {
1260
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1261
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1262
        }
1263
1264
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1265
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1266
        }
1267
1268
        return \iconv_mime_encode(
1269
            '',
1270
            $str,
1271
            [
1272
                'scheme'           => $transferEncoding,
1273
                'line-length'      => $indent,
1274
                'input-charset'    => $fromCharset,
1275
                'output-charset'   => $toCharset,
1276
                'line-break-chars' => $linefeed,
1277
            ]
1278
        );
1279
    }
1280
1281
    /**
1282
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1283
     *
1284
     * @param string   $str                    <p>The input string.</p>
1285
     * @param string   $search                 <p>The searched string.</p>
1286
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1287
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1288
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1289
     *
1290
     * @return string
1291
     */
1292 1
    public static function extract_text(
1293
        string $str,
1294
        string $search = '',
1295
        int $length = null,
1296
        string $replacerForSkippedText = '…',
1297
        string $encoding = 'UTF-8'
1298
    ): string {
1299 1
        if ($str === '') {
1300 1
            return '';
1301
        }
1302
1303 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1304
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1305
        }
1306
1307 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1308
1309 1
        if ($length === null) {
1310 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1311
        }
1312
1313 1
        if ($search === '') {
1314 1
            if ($encoding === 'UTF-8') {
1315 1
                if ($length > 0) {
1316 1
                    $stringLength = (int) \mb_strlen($str);
1317 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1318
                } else {
1319 1
                    $end = 0;
1320
                }
1321
1322 1
                $pos = (int) \min(
1323 1
                    \mb_strpos($str, ' ', $end),
1324 1
                    \mb_strpos($str, '.', $end)
1325
                );
1326
            } else {
1327
                if ($length > 0) {
1328
                    $stringLength = (int) self::strlen($str, $encoding);
1329
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1330
                } else {
1331
                    $end = 0;
1332
                }
1333
1334
                $pos = (int) \min(
1335
                    self::strpos($str, ' ', $end, $encoding),
1336
                    self::strpos($str, '.', $end, $encoding)
1337
                );
1338
            }
1339
1340 1
            if ($pos) {
1341 1
                if ($encoding === 'UTF-8') {
1342 1
                    $strSub = \mb_substr($str, 0, $pos);
1343
                } else {
1344
                    $strSub = self::substr($str, 0, $pos, $encoding);
1345
                }
1346
1347 1
                if ($strSub === false) {
1348
                    return '';
1349
                }
1350
1351 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1352
            }
1353
1354
            return $str;
1355
        }
1356
1357 1
        if ($encoding === 'UTF-8') {
1358 1
            $wordPos = (int) \mb_stripos($str, $search);
1359 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1360
        } else {
1361
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1362
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1363
        }
1364
1365 1
        $pos_start = 0;
1366 1
        if ($halfSide > 0) {
1367 1
            if ($encoding === 'UTF-8') {
1368 1
                $halfText = \mb_substr($str, 0, $halfSide);
1369
            } else {
1370
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1371
            }
1372 1
            if ($halfText !== false) {
1373 1
                if ($encoding === 'UTF-8') {
1374 1
                    $pos_start = (int) \max(
1375 1
                        \mb_strrpos($halfText, ' '),
1376 1
                        \mb_strrpos($halfText, '.')
1377
                    );
1378
                } else {
1379
                    $pos_start = (int) \max(
1380
                        self::strrpos($halfText, ' ', 0, $encoding),
1381
                        self::strrpos($halfText, '.', 0, $encoding)
1382
                    );
1383
                }
1384
            }
1385
        }
1386
1387 1
        if ($wordPos && $halfSide > 0) {
1388 1
            $offset = $pos_start + $length - 1;
1389 1
            $realLength = (int) self::strlen($str, $encoding);
1390
1391 1
            if ($offset > $realLength) {
1392
                $offset = $realLength;
1393
            }
1394
1395 1
            if ($encoding === 'UTF-8') {
1396 1
                $pos_end = (int) \min(
1397 1
                    \mb_strpos($str, ' ', $offset),
1398 1
                    \mb_strpos($str, '.', $offset)
1399 1
                    ) - $pos_start;
1400
            } else {
1401
                $pos_end = (int) \min(
1402
                    self::strpos($str, ' ', $offset, $encoding),
1403
                    self::strpos($str, '.', $offset, $encoding)
1404
                    ) - $pos_start;
1405
            }
1406
1407 1
            if (!$pos_end || $pos_end <= 0) {
1408 1
                if ($encoding === 'UTF-8') {
1409 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1410
                } else {
1411
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1412
                }
1413 1
                if ($strSub !== false) {
1414 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1415
                } else {
1416 1
                    $extract = '';
1417
                }
1418
            } else {
1419 1
                if ($encoding === 'UTF-8') {
1420 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1421
                } else {
1422
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1423
                }
1424 1
                if ($strSub !== false) {
1425 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1426
                } else {
1427 1
                    $extract = '';
1428
                }
1429
            }
1430
        } else {
1431 1
            $offset = $length - 1;
1432 1
            $trueLength = (int) self::strlen($str, $encoding);
1433
1434 1
            if ($offset > $trueLength) {
1435
                $offset = $trueLength;
1436
            }
1437
1438 1
            if ($encoding === 'UTF-8') {
1439 1
                $pos_end = (int) \min(
1440 1
                    \mb_strpos($str, ' ', $offset),
1441 1
                    \mb_strpos($str, '.', $offset)
1442
                );
1443
            } else {
1444
                $pos_end = (int) \min(
1445
                    self::strpos($str, ' ', $offset, $encoding),
1446
                    self::strpos($str, '.', $offset, $encoding)
1447
                );
1448
            }
1449
1450 1
            if ($pos_end) {
1451 1
                if ($encoding === 'UTF-8') {
1452 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1453
                } else {
1454
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1455
                }
1456 1
                if ($strSub !== false) {
1457 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1458
                } else {
1459 1
                    $extract = '';
1460
                }
1461
            } else {
1462 1
                $extract = $str;
1463
            }
1464
        }
1465
1466 1
        return $extract;
1467
    }
1468
1469
    /**
1470
     * Reads entire file into a string.
1471
     *
1472
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1473
     *
1474
     * @see http://php.net/manual/en/function.file-get-contents.php
1475
     *
1476
     * @param string        $filename         <p>
1477
     *                                        Name of the file to read.
1478
     *                                        </p>
1479
     * @param bool          $use_include_path [optional] <p>
1480
     *                                        Prior to PHP 5, this parameter is called
1481
     *                                        use_include_path and is a bool.
1482
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1483
     *                                        to trigger include path
1484
     *                                        search.
1485
     *                                        </p>
1486
     * @param resource|null $context          [optional] <p>
1487
     *                                        A valid context resource created with
1488
     *                                        stream_context_create. If you don't need to use a
1489
     *                                        custom context, you can skip this parameter by &null;.
1490
     *                                        </p>
1491
     * @param int|null      $offset           [optional] <p>
1492
     *                                        The offset where the reading starts.
1493
     *                                        </p>
1494
     * @param int|null      $maxLength        [optional] <p>
1495
     *                                        Maximum length of data read. The default is to read until end
1496
     *                                        of file is reached.
1497
     *                                        </p>
1498
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1499
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1500
     *                                        some files, because they used non default utf-8 chars. Binary files
1501
     *                                        like images or pdf will not be converted.</p>
1502
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1503
     *                                        A empty string will trigger the autodetect anyway.</p>
1504
     *
1505
     * @return false|string the function returns the read data as string or <b>false</b> on failure
1506
     */
1507 12
    public static function file_get_contents(
1508
        string $filename,
1509
        bool $use_include_path = false,
1510
        $context = null,
1511
        int $offset = null,
1512
        int $maxLength = null,
1513
        int $timeout = 10,
1514
        bool $convertToUtf8 = true,
1515
        string $fromEncoding = ''
1516
    ) {
1517
        // init
1518 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1519
1520 12
        if ($filename === false) {
1521
            return false;
1522
        }
1523
1524 12
        if ($timeout && $context === null) {
1525 9
            $context = \stream_context_create(
1526
                [
1527
                    'http' => [
1528 9
                        'timeout' => $timeout,
1529
                    ],
1530
                ]
1531
            );
1532
        }
1533
1534 12
        if ($offset === null) {
1535 12
            $offset = 0;
1536
        }
1537
1538 12
        if (\is_int($maxLength) === true) {
1539 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1540
        } else {
1541 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1542
        }
1543
1544
        // return false on error
1545 12
        if ($data === false) {
1546
            return false;
1547
        }
1548
1549 12
        if ($convertToUtf8 === true) {
1550
            if (
1551 12
                self::is_binary($data, true) === true
1552
                &&
1553 12
                self::is_utf16($data, false) === false
1554
                &&
1555 12
                self::is_utf32($data, false) === false
1556 7
            ) {
1557
                // do nothing, it's binary and not UTF16 or UTF32
1558
            } else {
1559 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1560 9
                $data = self::cleanup($data);
1561
            }
1562
        }
1563
1564 12
        return $data;
1565
    }
1566
1567
    /**
1568
     * Checks if a file starts with BOM (Byte Order Mark) character.
1569
     *
1570
     * @param string $file_path <p>Path to a valid file.</p>
1571
     *
1572
     * @throws \RuntimeException if file_get_contents() returned false
1573
     *
1574
     * @return bool
1575
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1576
     */
1577 2
    public static function file_has_bom(string $file_path): bool
1578
    {
1579 2
        $file_content = \file_get_contents($file_path);
1580 2
        if ($file_content === false) {
1581
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1582
        }
1583
1584 2
        return self::string_has_bom($file_content);
1585
    }
1586
1587
    /**
1588
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1589
     *
1590
     * @param mixed  $var
1591
     * @param int    $normalization_form
1592
     * @param string $leading_combining
1593
     *
1594
     * @return mixed
1595
     */
1596 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1597
    {
1598 62
        switch (\gettype($var)) {
1599 62
            case 'array':
1600 6
                foreach ($var as $k => &$v) {
1601 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1602
                }
1603 6
                unset($v);
1604
1605 6
                break;
1606 62
            case 'object':
1607 4
                foreach ($var as $k => &$v) {
1608 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1609
                }
1610 4
                unset($v);
1611
1612 4
                break;
1613 62
            case 'string':
1614
1615 62
                if (\strpos($var, "\r") !== false) {
1616
                    // Workaround https://bugs.php.net/65732
1617 3
                    $var = self::normalize_line_ending($var);
1618
                }
1619
1620 62
                if (self::is_ascii($var) === false) {
1621 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1622 27
                        $n = '-';
1623
                    } else {
1624 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1625
1626 12
                        if (isset($n[0])) {
1627 7
                            $var = $n;
1628
                        } else {
1629 8
                            $var = self::encode('UTF-8', $var, true);
1630
                        }
1631
                    }
1632
1633
                    if (
1634 32
                        $var[0] >= "\x80"
1635
                        &&
1636 32
                        isset($n[0], $leading_combining[0])
1637
                        &&
1638 32
                        \preg_match('/^\p{Mn}/u', $var)
1639
                    ) {
1640
                        // Prevent leading combining chars
1641
                        // for NFC-safe concatenations.
1642 3
                        $var = $leading_combining . $var;
1643
                    }
1644
                }
1645
1646 62
                break;
1647
        }
1648
1649 62
        return $var;
1650
    }
1651
1652
    /**
1653
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1654
     *
1655
     * Gets a specific external variable by name and optionally filters it
1656
     *
1657
     * @see http://php.net/manual/en/function.filter-input.php
1658
     *
1659
     * @param int    $type          <p>
1660
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1661
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1662
     *                              <b>INPUT_ENV</b>.
1663
     *                              </p>
1664
     * @param string $variable_name <p>
1665
     *                              Name of a variable to get.
1666
     *                              </p>
1667
     * @param int    $filter        [optional] <p>
1668
     *                              The ID of the filter to apply. The
1669
     *                              manual page lists the available filters.
1670
     *                              </p>
1671
     * @param mixed  $options       [optional] <p>
1672
     *                              Associative array of options or bitwise disjunction of flags. If filter
1673
     *                              accepts options, flags can be provided in "flags" field of array.
1674
     *                              </p>
1675
     *
1676
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1677
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1678
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1679
     */
1680
    public static function filter_input(
1681
        int $type,
1682
        string $variable_name,
1683
        int $filter = \FILTER_DEFAULT,
1684
        $options = null
1685
    ) {
1686
        if (\func_num_args() < 4) {
1687
            $var = \filter_input($type, $variable_name, $filter);
1688
        } else {
1689
            $var = \filter_input($type, $variable_name, $filter, $options);
1690
        }
1691
1692
        return self::filter($var);
1693
    }
1694
1695
    /**
1696
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1697
     *
1698
     * Gets external variables and optionally filters them
1699
     *
1700
     * @see http://php.net/manual/en/function.filter-input-array.php
1701
     *
1702
     * @param int   $type       <p>
1703
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1704
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1705
     *                          <b>INPUT_ENV</b>.
1706
     *                          </p>
1707
     * @param mixed $definition [optional] <p>
1708
     *                          An array defining the arguments. A valid key is a string
1709
     *                          containing a variable name and a valid value is either a filter type, or an array
1710
     *                          optionally specifying the filter, flags and options. If the value is an
1711
     *                          array, valid keys are filter which specifies the
1712
     *                          filter type,
1713
     *                          flags which specifies any flags that apply to the
1714
     *                          filter, and options which specifies any options that
1715
     *                          apply to the filter. See the example below for a better understanding.
1716
     *                          </p>
1717
     *                          <p>
1718
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1719
     *                          input array are filtered by this filter.
1720
     *                          </p>
1721
     * @param bool  $add_empty  [optional] <p>
1722
     *                          Add missing keys as <b>NULL</b> to the return value.
1723
     *                          </p>
1724
     *
1725
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1726
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1727
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1728
     *               is not set and <b>NULL</b> if the filter fails.
1729
     */
1730
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1731
    {
1732
        if (\func_num_args() < 2) {
1733
            $a = \filter_input_array($type);
1734
        } else {
1735
            $a = \filter_input_array($type, $definition, $add_empty);
1736
        }
1737
1738
        return self::filter($a);
1739
    }
1740
1741
    /**
1742
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1743
     *
1744
     * Filters a variable with a specified filter
1745
     *
1746
     * @see http://php.net/manual/en/function.filter-var.php
1747
     *
1748
     * @param mixed $variable <p>
1749
     *                        Value to filter.
1750
     *                        </p>
1751
     * @param int   $filter   [optional] <p>
1752
     *                        The ID of the filter to apply. The
1753
     *                        manual page lists the available filters.
1754
     *                        </p>
1755
     * @param mixed $options  [optional] <p>
1756
     *                        Associative array of options or bitwise disjunction of flags. If filter
1757
     *                        accepts options, flags can be provided in "flags" field of array. For
1758
     *                        the "callback" filter, callable type should be passed. The
1759
     *                        callback must accept one argument, the value to be filtered, and return
1760
     *                        the value after filtering/sanitizing it.
1761
     *                        </p>
1762
     *                        <p>
1763
     *                        <code>
1764
     *                        // for filters that accept options, use this format
1765
     *                        $options = array(
1766
     *                        'options' => array(
1767
     *                        'default' => 3, // value to return if the filter fails
1768
     *                        // other options here
1769
     *                        'min_range' => 0
1770
     *                        ),
1771
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1772
     *                        );
1773
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1774
     *                        // for filter that only accept flags, you can pass them directly
1775
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1776
     *                        // for filter that only accept flags, you can also pass as an array
1777
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1778
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1779
     *                        // callback validate filter
1780
     *                        function foo($value)
1781
     *                        {
1782
     *                        // Expected format: Surname, GivenNames
1783
     *                        if (strpos($value, ", ") === false) return false;
1784
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1785
     *                        $empty = (empty($surname) || empty($givennames));
1786
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1787
     *                        if ($empty || $notstrings) {
1788
     *                        return false;
1789
     *                        } else {
1790
     *                        return $value;
1791
     *                        }
1792
     *                        }
1793
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1794
     *                        </code>
1795
     *                        </p>
1796
     *
1797
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1798
     */
1799 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1800
    {
1801 2
        if (\func_num_args() < 3) {
1802 2
            $variable = \filter_var($variable, $filter);
1803
        } else {
1804 2
            $variable = \filter_var($variable, $filter, $options);
1805
        }
1806
1807 2
        return self::filter($variable);
1808
    }
1809
1810
    /**
1811
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1812
     *
1813
     * Gets multiple variables and optionally filters them
1814
     *
1815
     * @see http://php.net/manual/en/function.filter-var-array.php
1816
     *
1817
     * @param array $data       <p>
1818
     *                          An array with string keys containing the data to filter.
1819
     *                          </p>
1820
     * @param mixed $definition [optional] <p>
1821
     *                          An array defining the arguments. A valid key is a string
1822
     *                          containing a variable name and a valid value is either a
1823
     *                          filter type, or an
1824
     *                          array optionally specifying the filter, flags and options.
1825
     *                          If the value is an array, valid keys are filter
1826
     *                          which specifies the filter type,
1827
     *                          flags which specifies any flags that apply to the
1828
     *                          filter, and options which specifies any options that
1829
     *                          apply to the filter. See the example below for a better understanding.
1830
     *                          </p>
1831
     *                          <p>
1832
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1833
     *                          input array are filtered by this filter.
1834
     *                          </p>
1835
     * @param bool  $add_empty  [optional] <p>
1836
     *                          Add missing keys as <b>NULL</b> to the return value.
1837
     *                          </p>
1838
     *
1839
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1840
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1841
     *               set
1842
     */
1843 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1844
    {
1845 2
        if (\func_num_args() < 2) {
1846 2
            $a = \filter_var_array($data);
1847
        } else {
1848 2
            $a = \filter_var_array($data, $definition, $add_empty);
1849
        }
1850
1851 2
        return self::filter($a);
1852
    }
1853
1854
    /**
1855
     * Checks whether finfo is available on the server.
1856
     *
1857
     * @return bool
1858
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1859
     */
1860
    public static function finfo_loaded(): bool
1861
    {
1862
        return \class_exists('finfo');
1863
    }
1864
1865
    /**
1866
     * Returns the first $n characters of the string.
1867
     *
1868
     * @param string $str      <p>The input string.</p>
1869
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1870
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1871
     *
1872
     * @return string
1873
     */
1874 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1875
    {
1876 13
        if ($str === '' || $n <= 0) {
1877 5
            return '';
1878
        }
1879
1880 8
        if ($encoding === 'UTF-8') {
1881 4
            return (string) \mb_substr($str, 0, $n);
1882
        }
1883
1884 4
        return (string) self::substr($str, 0, $n, $encoding);
1885
    }
1886
1887
    /**
1888
     * Check if the number of unicode characters are not more than the specified integer.
1889
     *
1890
     * @param string $str      the original string to be checked
1891
     * @param int    $box_size the size in number of chars to be checked against string
1892
     *
1893
     * @return bool true if string is less than or equal to $box_size, false otherwise
1894
     */
1895 2
    public static function fits_inside(string $str, int $box_size): bool
1896
    {
1897 2
        return self::strlen($str) <= $box_size;
1898
    }
1899
1900
    /**
1901
     * Try to fix simple broken UTF-8 strings.
1902
     *
1903
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1904
     *
1905
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1906
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1907
     * See: http://en.wikipedia.org/wiki/Windows-1252
1908
     *
1909
     * @param string $str <p>The input string</p>
1910
     *
1911
     * @return string
1912
     */
1913 46
    public static function fix_simple_utf8(string $str): string
1914
    {
1915 46
        if ($str === '') {
1916 4
            return '';
1917
        }
1918
1919 46
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1920 46
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1921
1922 46
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1923 1
            if (self::$BROKEN_UTF8_FIX === null) {
1924 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1925
            }
1926
1927 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1928 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1929
        }
1930
1931 46
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1932
    }
1933
1934
    /**
1935
     * Fix a double (or multiple) encoded UTF8 string.
1936
     *
1937
     * @param string|string[] $str you can use a string or an array of strings
1938
     *
1939
     * @return string|string[]
1940
     *                         Will return the fixed input-"array" or
1941
     *                         the fixed input-"string"
1942
     *
1943
     * @psalm-suppress InvalidReturnType
1944
     */
1945 2
    public static function fix_utf8($str)
1946
    {
1947 2
        if (\is_array($str) === true) {
1948 2
            foreach ($str as $k => &$v) {
1949 2
                $v = self::fix_utf8($v);
1950
            }
1951 2
            unset($v);
1952
1953
            /**
1954
             * @psalm-suppress InvalidReturnStatement
1955
             */
1956 2
            return $str;
1957
        }
1958
1959 2
        $str = (string) $str;
1960 2
        $last = '';
1961 2
        while ($last !== $str) {
1962 2
            $last = $str;
1963
            /**
1964
             * @psalm-suppress PossiblyInvalidArgument
1965
             */
1966 2
            $str = self::to_utf8(
1967 2
                self::utf8_decode($str, true)
1968
            );
1969
        }
1970
1971
        /**
1972
         * @psalm-suppress InvalidReturnStatement
1973
         */
1974 2
        return $str;
1975
    }
1976
1977
    /**
1978
     * Get character of a specific character.
1979
     *
1980
     * @param string $char
1981
     *
1982
     * @return string 'RTL' or 'LTR'
1983
     */
1984 2
    public static function getCharDirection(string $char): string
1985
    {
1986 2
        if (self::$SUPPORT['intlChar'] === true) {
1987
            /** @noinspection PhpComposerExtensionStubsInspection */
1988 2
            $tmpReturn = \IntlChar::charDirection($char);
1989
1990
            // from "IntlChar"-Class
1991
            $charDirection = [
1992 2
                'RTL' => [1, 13, 14, 15, 21],
1993
                'LTR' => [0, 11, 12, 20],
1994
            ];
1995
1996 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1997
                return 'LTR';
1998
            }
1999
2000 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
2001 2
                return 'RTL';
2002
            }
2003
        }
2004
2005 2
        $c = static::chr_to_decimal($char);
2006
2007 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2008 2
            return 'LTR';
2009
        }
2010
2011 2
        if ($c <= 0x85e) {
2012 2
            if ($c === 0x5be ||
2013 2
                $c === 0x5c0 ||
2014 2
                $c === 0x5c3 ||
2015 2
                $c === 0x5c6 ||
2016 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2017 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2018 2
                $c === 0x608 ||
2019 2
                $c === 0x60b ||
2020 2
                $c === 0x60d ||
2021 2
                $c === 0x61b ||
2022 2
                ($c >= 0x61e && $c <= 0x64a) ||
2023
                ($c >= 0x66d && $c <= 0x66f) ||
2024
                ($c >= 0x671 && $c <= 0x6d5) ||
2025
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2026
                ($c >= 0x6ee && $c <= 0x6ef) ||
2027
                ($c >= 0x6fa && $c <= 0x70d) ||
2028
                $c === 0x710 ||
2029
                ($c >= 0x712 && $c <= 0x72f) ||
2030
                ($c >= 0x74d && $c <= 0x7a5) ||
2031
                $c === 0x7b1 ||
2032
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2033
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2034
                $c === 0x7fa ||
2035
                ($c >= 0x800 && $c <= 0x815) ||
2036
                $c === 0x81a ||
2037
                $c === 0x824 ||
2038
                $c === 0x828 ||
2039
                ($c >= 0x830 && $c <= 0x83e) ||
2040
                ($c >= 0x840 && $c <= 0x858) ||
2041 2
                $c === 0x85e
2042
            ) {
2043 2
                return 'RTL';
2044
            }
2045 2
        } elseif ($c === 0x200f) {
2046
            return 'RTL';
2047 2
        } elseif ($c >= 0xfb1d) {
2048 2
            if ($c === 0xfb1d ||
2049 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2050 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2051 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2052 2
                $c === 0xfb3e ||
2053 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2054 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2055 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2056 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2057 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2058 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2059 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2060 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2061 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2062 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2063 2
                $c === 0x10808 ||
2064 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2065 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2066 2
                $c === 0x1083c ||
2067 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2068 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2069 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2070 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2071 2
                $c === 0x1093f ||
2072 2
                $c === 0x10a00 ||
2073 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2074 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2075 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2076 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2077 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2078 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2079 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2080 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2081 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2082 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2083
            ) {
2084 2
                return 'RTL';
2085
            }
2086
        }
2087
2088 2
        return 'LTR';
2089
    }
2090
2091
    /**
2092
     * Check for php-support.
2093
     *
2094
     * @param string|null $key
2095
     *
2096
     * @return mixed
2097
     *               Return the full support-"array", if $key === null<br>
2098
     *               return bool-value, if $key is used and available<br>
2099
     *               otherwise return <strong>null</strong>
2100
     */
2101 27
    public static function getSupportInfo(string $key = null)
2102
    {
2103 27
        if ($key === null) {
2104 4
            return self::$SUPPORT;
2105
        }
2106
2107 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2108 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2109
        }
2110
        // compatibility fix for old versions
2111 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2112
2113 25
        return self::$SUPPORT[$key] ?? null;
2114
    }
2115
2116
    /**
2117
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2118
     *          if you need more supported types, please use e.g. "finfo"
2119
     *
2120
     * @param string $str
2121
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2122
     *
2123
     * @return array
2124
     *               with this keys: 'ext', 'mime', 'type'
2125
     */
2126 39
    public static function get_file_type(
2127
        string $str,
2128
        array $fallback = [
2129
            'ext'  => null,
2130
            'mime' => 'application/octet-stream',
2131
            'type' => null,
2132
        ]
2133
    ): array {
2134 39
        if ($str === '') {
2135
            return $fallback;
2136
        }
2137
2138 39
        $str_info = \substr($str, 0, 2);
2139 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2140 11
            return $fallback;
2141
        }
2142
2143 35
        $str_info = \unpack('C2chars', $str_info);
2144 35
        if ($str_info === false) {
2145
            return $fallback;
2146
        }
2147 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2148
2149
        // DEBUG
2150
        //var_dump($type_code);
2151
2152
        switch ($type_code) {
2153 35
            case 3780:
2154 5
                $ext = 'pdf';
2155 5
                $mime = 'application/pdf';
2156 5
                $type = 'binary';
2157
2158 5
                break;
2159 35
            case 7790:
2160
                $ext = 'exe';
2161
                $mime = 'application/octet-stream';
2162
                $type = 'binary';
2163
2164
                break;
2165 35
            case 7784:
2166
                $ext = 'midi';
2167
                $mime = 'audio/x-midi';
2168
                $type = 'binary';
2169
2170
                break;
2171 35
            case 8075:
2172 7
                $ext = 'zip';
2173 7
                $mime = 'application/zip';
2174 7
                $type = 'binary';
2175
2176 7
                break;
2177 35
            case 8297:
2178
                $ext = 'rar';
2179
                $mime = 'application/rar';
2180
                $type = 'binary';
2181
2182
                break;
2183 35
            case 255216:
2184
                $ext = 'jpg';
2185
                $mime = 'image/jpeg';
2186
                $type = 'binary';
2187
2188
                break;
2189 35
            case 7173:
2190
                $ext = 'gif';
2191
                $mime = 'image/gif';
2192
                $type = 'binary';
2193
2194
                break;
2195 35
            case 7373:
2196
                $ext = 'tiff';
2197
                $mime = 'image/tiff';
2198
                $type = 'binary';
2199
2200
                break;
2201 35
            case 6677:
2202
                $ext = 'bmp';
2203
                $mime = 'image/bmp';
2204
                $type = 'binary';
2205
2206
                break;
2207 35
            case 13780:
2208 7
                $ext = 'png';
2209 7
                $mime = 'image/png';
2210 7
                $type = 'binary';
2211
2212 7
                break;
2213
            default:
2214 32
                return $fallback;
2215
        }
2216
2217
        return [
2218 7
            'ext'  => $ext,
2219 7
            'mime' => $mime,
2220 7
            'type' => $type,
2221
        ];
2222
    }
2223
2224
    /**
2225
     * @param int    $length        <p>Length of the random string.</p>
2226
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2227
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2228
     *
2229
     * @return string
2230
     */
2231 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2232
    {
2233
        // init
2234 1
        $i = 0;
2235 1
        $str = '';
2236
2237
        //
2238
        // add random chars
2239
        //
2240
2241 1
        if ($encoding === 'UTF-8') {
2242 1
            $maxlength = (int) \mb_strlen($possibleChars);
2243 1
            if ($maxlength === 0) {
2244 1
                return '';
2245
            }
2246
2247 1
            while ($i < $length) {
2248
                try {
2249 1
                    $randInt = \random_int(0, $maxlength - 1);
2250
                } catch (\Exception $e) {
2251
                    /** @noinspection RandomApiMigrationInspection */
2252
                    $randInt = \mt_rand(0, $maxlength - 1);
2253
                }
2254 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2255 1
                if ($char !== false) {
2256 1
                    $str .= $char;
2257 1
                    ++$i;
2258
                }
2259
            }
2260
        } else {
2261
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2262
2263
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2264
            if ($maxlength === 0) {
2265
                return '';
2266
            }
2267
2268
            while ($i < $length) {
2269
                try {
2270
                    $randInt = \random_int(0, $maxlength - 1);
2271
                } catch (\Exception $e) {
2272
                    /** @noinspection RandomApiMigrationInspection */
2273
                    $randInt = \mt_rand(0, $maxlength - 1);
2274
                }
2275
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2276
                if ($char !== false) {
2277
                    $str .= $char;
2278
                    ++$i;
2279
                }
2280
            }
2281
        }
2282
2283 1
        return $str;
2284
    }
2285
2286
    /**
2287
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2288
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2289
     *
2290
     * @return string
2291
     */
2292 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2293
    {
2294 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2295 1
                        \session_id() .
2296 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2297 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2298 1
                        $entropyExtra;
2299
2300 1
        $uniqueString = \uniqid($uniqueHelper, true);
2301
2302 1
        if ($md5) {
2303 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2304
        }
2305
2306 1
        return $uniqueString;
2307
    }
2308
2309
    /**
2310
     * alias for "UTF8::string_has_bom()"
2311
     *
2312
     * @param string $str
2313
     *
2314
     * @return bool
2315
     *
2316
     * @see UTF8::string_has_bom()
2317
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2318
     */
2319 2
    public static function hasBom(string $str): bool
2320
    {
2321 2
        return self::string_has_bom($str);
2322
    }
2323
2324
    /**
2325
     * Returns true if the string contains a lower case char, false otherwise.
2326
     *
2327
     * @param string $str <p>The input string.</p>
2328
     *
2329
     * @return bool whether or not the string contains a lower case character
2330
     */
2331 47
    public static function has_lowercase(string $str): bool
2332
    {
2333 47
        if (self::$SUPPORT['mbstring'] === true) {
2334
            /** @noinspection PhpComposerExtensionStubsInspection */
2335 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2336
        }
2337
2338
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2339
    }
2340
2341
    /**
2342
     * Returns true if the string contains an upper case char, false otherwise.
2343
     *
2344
     * @param string $str <p>The input string.</p>
2345
     *
2346
     * @return bool whether or not the string contains an upper case character
2347
     */
2348 12
    public static function has_uppercase(string $str): bool
2349
    {
2350 12
        if (self::$SUPPORT['mbstring'] === true) {
2351
            /** @noinspection PhpComposerExtensionStubsInspection */
2352 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2353
        }
2354
2355
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2356
    }
2357
2358
    /**
2359
     * Converts a hexadecimal-value into an UTF-8 character.
2360
     *
2361
     * @param string $hexdec <p>The hexadecimal value.</p>
2362
     *
2363
     * @return false|string one single UTF-8 character
2364
     */
2365 4
    public static function hex_to_chr(string $hexdec)
2366
    {
2367 4
        return self::decimal_to_chr(\hexdec($hexdec));
2368
    }
2369
2370
    /**
2371
     * Converts hexadecimal U+xxxx code point representation to integer.
2372
     *
2373
     * INFO: opposite to UTF8::int_to_hex()
2374
     *
2375
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2376
     *
2377
     * @return false|int the code point, or false on failure
2378
     */
2379 2
    public static function hex_to_int($hexDec)
2380
    {
2381
        // init
2382 2
        $hexDec = (string) $hexDec;
2383
2384 2
        if ($hexDec === '') {
2385 2
            return false;
2386
        }
2387
2388 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) {
2389 2
            return \intval($match[1], 16);
2390
        }
2391
2392 2
        return false;
2393
    }
2394
2395
    /**
2396
     * alias for "UTF8::html_entity_decode()"
2397
     *
2398
     * @param string $str
2399
     * @param int    $flags
2400
     * @param string $encoding
2401
     *
2402
     * @return string
2403
     *
2404
     * @see UTF8::html_entity_decode()
2405
     */
2406 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2407
    {
2408 4
        return self::html_entity_decode($str, $flags, $encoding);
2409
    }
2410
2411
    /**
2412
     * Converts a UTF-8 string to a series of HTML numbered entities.
2413
     *
2414
     * INFO: opposite to UTF8::html_decode()
2415
     *
2416
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2417
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2418
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2419
     *
2420
     * @return string HTML numbered entities
2421
     */
2422 14
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2423
    {
2424 14
        if ($str === '') {
2425 4
            return '';
2426
        }
2427
2428 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2429 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2430
        }
2431
2432
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2433 14
        if (self::$SUPPORT['mbstring'] === true) {
2434 14
            $startCode = 0x00;
2435 14
            if ($keepAsciiChars === true) {
2436 13
                $startCode = 0x80;
2437
            }
2438
2439 14
            if ($encoding === 'UTF-8') {
2440 14
                return \mb_encode_numericentity(
2441 14
                    $str,
2442 14
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2443
                );
2444
            }
2445
2446 4
            return \mb_encode_numericentity(
2447 4
                $str,
2448 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2449 4
                $encoding
2450
            );
2451
        }
2452
2453
        //
2454
        // fallback via vanilla php
2455
        //
2456
2457
        return \implode(
2458
            '',
2459
            \array_map(
2460
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2461
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2462
                },
2463
                self::str_split($str)
2464
            )
2465
        );
2466
    }
2467
2468
    /**
2469
     * UTF-8 version of html_entity_decode()
2470
     *
2471
     * The reason we are not using html_entity_decode() by itself is because
2472
     * while it is not technically correct to leave out the semicolon
2473
     * at the end of an entity most browsers will still interpret the entity
2474
     * correctly. html_entity_decode() does not convert entities without
2475
     * semicolons, so we are left with our own little solution here. Bummer.
2476
     *
2477
     * Convert all HTML entities to their applicable characters
2478
     *
2479
     * INFO: opposite to UTF8::html_encode()
2480
     *
2481
     * @see http://php.net/manual/en/function.html-entity-decode.php
2482
     *
2483
     * @param string $str      <p>
2484
     *                         The input string.
2485
     *                         </p>
2486
     * @param int    $flags    [optional] <p>
2487
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2488
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2489
     *                         <table>
2490
     *                         Available <i>flags</i> constants
2491
     *                         <tr valign="top">
2492
     *                         <td>Constant Name</td>
2493
     *                         <td>Description</td>
2494
     *                         </tr>
2495
     *                         <tr valign="top">
2496
     *                         <td><b>ENT_COMPAT</b></td>
2497
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2498
     *                         </tr>
2499
     *                         <tr valign="top">
2500
     *                         <td><b>ENT_QUOTES</b></td>
2501
     *                         <td>Will convert both double and single quotes.</td>
2502
     *                         </tr>
2503
     *                         <tr valign="top">
2504
     *                         <td><b>ENT_NOQUOTES</b></td>
2505
     *                         <td>Will leave both double and single quotes unconverted.</td>
2506
     *                         </tr>
2507
     *                         <tr valign="top">
2508
     *                         <td><b>ENT_HTML401</b></td>
2509
     *                         <td>
2510
     *                         Handle code as HTML 4.01.
2511
     *                         </td>
2512
     *                         </tr>
2513
     *                         <tr valign="top">
2514
     *                         <td><b>ENT_XML1</b></td>
2515
     *                         <td>
2516
     *                         Handle code as XML 1.
2517
     *                         </td>
2518
     *                         </tr>
2519
     *                         <tr valign="top">
2520
     *                         <td><b>ENT_XHTML</b></td>
2521
     *                         <td>
2522
     *                         Handle code as XHTML.
2523
     *                         </td>
2524
     *                         </tr>
2525
     *                         <tr valign="top">
2526
     *                         <td><b>ENT_HTML5</b></td>
2527
     *                         <td>
2528
     *                         Handle code as HTML 5.
2529
     *                         </td>
2530
     *                         </tr>
2531
     *                         </table>
2532
     *                         </p>
2533
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2534
     *
2535
     * @return string the decoded string
2536
     */
2537 46
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2538
    {
2539
        if (
2540 46
            !isset($str[3]) // examples: &; || &x;
2541
            ||
2542 46
            \strpos($str, '&') === false // no "&"
2543
        ) {
2544 23
            return $str;
2545
        }
2546
2547 44
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2548 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2549
        }
2550
2551 44
        if ($flags === null) {
2552 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2553
        }
2554
2555
        if (
2556 44
            $encoding !== 'UTF-8'
2557
            &&
2558 44
            $encoding !== 'ISO-8859-1'
2559
            &&
2560 44
            $encoding !== 'WINDOWS-1252'
2561
            &&
2562 44
            self::$SUPPORT['mbstring'] === false
2563
        ) {
2564
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2565
        }
2566
2567
        do {
2568 44
            $str_compare = $str;
2569
2570
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2571 44
            if (self::$SUPPORT['mbstring'] === true) {
2572 44
                if ($encoding === 'UTF-8') {
2573 44
                    $str = \mb_decode_numericentity(
2574 44
                        $str,
2575 44
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2576
                    );
2577
                } else {
2578 4
                    $str = \mb_decode_numericentity(
2579 4
                        $str,
2580 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2581 44
                        $encoding
2582
                    );
2583
                }
2584
            } else {
2585
                $str = (string) \preg_replace_callback(
2586
                    "/&#\d{2,6};/",
2587
                    /**
2588
                     * @param string[] $matches
2589
                     *
2590
                     * @return string
2591
                     */
2592
                    static function (array $matches) use ($encoding): string {
2593
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2594
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2595
                            return $returnTmp;
2596
                        }
2597
2598
                        return $matches[0];
2599
                    },
2600
                    $str
2601
                );
2602
            }
2603
2604 44
            if (\strpos($str, '&') !== false) {
2605 40
                if (\strpos($str, '&#') !== false) {
2606
                    // decode also numeric & UTF16 two byte entities
2607 32
                    $str = (string) \preg_replace(
2608 32
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2609 32
                        '$1;',
2610 32
                        $str
2611
                    );
2612
                }
2613
2614 40
                $str = \html_entity_decode(
2615 40
                    $str,
2616 40
                    $flags,
2617 40
                    $encoding
2618
                );
2619
            }
2620 44
        } while ($str_compare !== $str);
2621
2622 44
        return $str;
2623
    }
2624
2625
    /**
2626
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2627
     *
2628
     * @param string $str
2629
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2630
     *
2631
     * @return string
2632
     */
2633 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2634
    {
2635 6
        return self::htmlspecialchars(
2636 6
            $str,
2637 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2638 6
            $encoding
2639
        );
2640
    }
2641
2642
    /**
2643
     * Remove empty html-tag.
2644
     *
2645
     * e.g.: <tag></tag>
2646
     *
2647
     * @param string $str
2648
     *
2649
     * @return string
2650
     */
2651 1
    public static function html_stripe_empty_tags(string $str): string
2652
    {
2653 1
        return (string) \preg_replace(
2654 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/u",
2655 1
            '',
2656 1
            $str
2657
        );
2658
    }
2659
2660
    /**
2661
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2662
     *
2663
     * @see http://php.net/manual/en/function.htmlentities.php
2664
     *
2665
     * @param string $str           <p>
2666
     *                              The input string.
2667
     *                              </p>
2668
     * @param int    $flags         [optional] <p>
2669
     *                              A bitmask of one or more of the following flags, which specify how to handle
2670
     *                              quotes, invalid code unit sequences and the used document type. The default is
2671
     *                              ENT_COMPAT | ENT_HTML401.
2672
     *                              <table>
2673
     *                              Available <i>flags</i> constants
2674
     *                              <tr valign="top">
2675
     *                              <td>Constant Name</td>
2676
     *                              <td>Description</td>
2677
     *                              </tr>
2678
     *                              <tr valign="top">
2679
     *                              <td><b>ENT_COMPAT</b></td>
2680
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2681
     *                              </tr>
2682
     *                              <tr valign="top">
2683
     *                              <td><b>ENT_QUOTES</b></td>
2684
     *                              <td>Will convert both double and single quotes.</td>
2685
     *                              </tr>
2686
     *                              <tr valign="top">
2687
     *                              <td><b>ENT_NOQUOTES</b></td>
2688
     *                              <td>Will leave both double and single quotes unconverted.</td>
2689
     *                              </tr>
2690
     *                              <tr valign="top">
2691
     *                              <td><b>ENT_IGNORE</b></td>
2692
     *                              <td>
2693
     *                              Silently discard invalid code unit sequences instead of returning
2694
     *                              an empty string. Using this flag is discouraged as it
2695
     *                              may have security implications.
2696
     *                              </td>
2697
     *                              </tr>
2698
     *                              <tr valign="top">
2699
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2700
     *                              <td>
2701
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2702
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2703
     *                              string.
2704
     *                              </td>
2705
     *                              </tr>
2706
     *                              <tr valign="top">
2707
     *                              <td><b>ENT_DISALLOWED</b></td>
2708
     *                              <td>
2709
     *                              Replace invalid code points for the given document type with a
2710
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2711
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2712
     *                              instance, to ensure the well-formedness of XML documents with
2713
     *                              embedded external content.
2714
     *                              </td>
2715
     *                              </tr>
2716
     *                              <tr valign="top">
2717
     *                              <td><b>ENT_HTML401</b></td>
2718
     *                              <td>
2719
     *                              Handle code as HTML 4.01.
2720
     *                              </td>
2721
     *                              </tr>
2722
     *                              <tr valign="top">
2723
     *                              <td><b>ENT_XML1</b></td>
2724
     *                              <td>
2725
     *                              Handle code as XML 1.
2726
     *                              </td>
2727
     *                              </tr>
2728
     *                              <tr valign="top">
2729
     *                              <td><b>ENT_XHTML</b></td>
2730
     *                              <td>
2731
     *                              Handle code as XHTML.
2732
     *                              </td>
2733
     *                              </tr>
2734
     *                              <tr valign="top">
2735
     *                              <td><b>ENT_HTML5</b></td>
2736
     *                              <td>
2737
     *                              Handle code as HTML 5.
2738
     *                              </td>
2739
     *                              </tr>
2740
     *                              </table>
2741
     *                              </p>
2742
     * @param string $encoding      [optional] <p>
2743
     *                              Like <b>htmlspecialchars</b>,
2744
     *                              <b>htmlentities</b> takes an optional third argument
2745
     *                              <i>encoding</i> which defines encoding used in
2746
     *                              conversion.
2747
     *                              Although this argument is technically optional, you are highly
2748
     *                              encouraged to specify the correct value for your code.
2749
     *                              </p>
2750
     * @param bool   $double_encode [optional] <p>
2751
     *                              When <i>double_encode</i> is turned off PHP will not
2752
     *                              encode existing html entities. The default is to convert everything.
2753
     *                              </p>
2754
     *
2755
     * @return string
2756
     *                <p>
2757
     *                The encoded string.
2758
     *                <br><br>
2759
     *                If the input <i>string</i> contains an invalid code unit
2760
     *                sequence within the given <i>encoding</i> an empty string
2761
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2762
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2763
     *                </p>
2764
     */
2765 9
    public static function htmlentities(
2766
        string $str,
2767
        int $flags = \ENT_COMPAT,
2768
        string $encoding = 'UTF-8',
2769
        bool $double_encode = true
2770
    ): string {
2771 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2772 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2773
        }
2774
2775 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2776
2777
        /**
2778
         * PHP doesn't replace a backslash to its html entity since this is something
2779
         * that's mostly used to escape characters when inserting in a database. Since
2780
         * we're using a decent database layer, we don't need this shit and we're replacing
2781
         * the double backslashes by its' html entity equivalent.
2782
         *
2783
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2784
         */
2785 9
        $str = \str_replace('\\', '&#92;', $str);
2786
2787 9
        return self::html_encode($str, true, $encoding);
2788
    }
2789
2790
    /**
2791
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2792
     *
2793
     * INFO: Take a look at "UTF8::htmlentities()"
2794
     *
2795
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2796
     *
2797
     * @param string $str           <p>
2798
     *                              The string being converted.
2799
     *                              </p>
2800
     * @param int    $flags         [optional] <p>
2801
     *                              A bitmask of one or more of the following flags, which specify how to handle
2802
     *                              quotes, invalid code unit sequences and the used document type. The default is
2803
     *                              ENT_COMPAT | ENT_HTML401.
2804
     *                              <table>
2805
     *                              Available <i>flags</i> constants
2806
     *                              <tr valign="top">
2807
     *                              <td>Constant Name</td>
2808
     *                              <td>Description</td>
2809
     *                              </tr>
2810
     *                              <tr valign="top">
2811
     *                              <td><b>ENT_COMPAT</b></td>
2812
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2813
     *                              </tr>
2814
     *                              <tr valign="top">
2815
     *                              <td><b>ENT_QUOTES</b></td>
2816
     *                              <td>Will convert both double and single quotes.</td>
2817
     *                              </tr>
2818
     *                              <tr valign="top">
2819
     *                              <td><b>ENT_NOQUOTES</b></td>
2820
     *                              <td>Will leave both double and single quotes unconverted.</td>
2821
     *                              </tr>
2822
     *                              <tr valign="top">
2823
     *                              <td><b>ENT_IGNORE</b></td>
2824
     *                              <td>
2825
     *                              Silently discard invalid code unit sequences instead of returning
2826
     *                              an empty string. Using this flag is discouraged as it
2827
     *                              may have security implications.
2828
     *                              </td>
2829
     *                              </tr>
2830
     *                              <tr valign="top">
2831
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2832
     *                              <td>
2833
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2834
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2835
     *                              string.
2836
     *                              </td>
2837
     *                              </tr>
2838
     *                              <tr valign="top">
2839
     *                              <td><b>ENT_DISALLOWED</b></td>
2840
     *                              <td>
2841
     *                              Replace invalid code points for the given document type with a
2842
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2843
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2844
     *                              instance, to ensure the well-formedness of XML documents with
2845
     *                              embedded external content.
2846
     *                              </td>
2847
     *                              </tr>
2848
     *                              <tr valign="top">
2849
     *                              <td><b>ENT_HTML401</b></td>
2850
     *                              <td>
2851
     *                              Handle code as HTML 4.01.
2852
     *                              </td>
2853
     *                              </tr>
2854
     *                              <tr valign="top">
2855
     *                              <td><b>ENT_XML1</b></td>
2856
     *                              <td>
2857
     *                              Handle code as XML 1.
2858
     *                              </td>
2859
     *                              </tr>
2860
     *                              <tr valign="top">
2861
     *                              <td><b>ENT_XHTML</b></td>
2862
     *                              <td>
2863
     *                              Handle code as XHTML.
2864
     *                              </td>
2865
     *                              </tr>
2866
     *                              <tr valign="top">
2867
     *                              <td><b>ENT_HTML5</b></td>
2868
     *                              <td>
2869
     *                              Handle code as HTML 5.
2870
     *                              </td>
2871
     *                              </tr>
2872
     *                              </table>
2873
     *                              </p>
2874
     * @param string $encoding      [optional] <p>
2875
     *                              Defines encoding used in conversion.
2876
     *                              </p>
2877
     *                              <p>
2878
     *                              For the purposes of this function, the encodings
2879
     *                              ISO-8859-1, ISO-8859-15,
2880
     *                              UTF-8, cp866,
2881
     *                              cp1251, cp1252, and
2882
     *                              KOI8-R are effectively equivalent, provided the
2883
     *                              <i>string</i> itself is valid for the encoding, as
2884
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2885
     *                              the same positions in all of these encodings.
2886
     *                              </p>
2887
     * @param bool   $double_encode [optional] <p>
2888
     *                              When <i>double_encode</i> is turned off PHP will not
2889
     *                              encode existing html entities, the default is to convert everything.
2890
     *                              </p>
2891
     *
2892
     * @return string the converted string.
2893
     *                </p>
2894
     *                <p>
2895
     *                If the input <i>string</i> contains an invalid code unit
2896
     *                sequence within the given <i>encoding</i> an empty string
2897
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2898
     *                <b>ENT_SUBSTITUTE</b> flags are set
2899
     */
2900 8
    public static function htmlspecialchars(
2901
        string $str,
2902
        int $flags = \ENT_COMPAT,
2903
        string $encoding = 'UTF-8',
2904
        bool $double_encode = true
2905
    ): string {
2906 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2907 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2908
        }
2909
2910 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2911
    }
2912
2913
    /**
2914
     * Checks whether iconv is available on the server.
2915
     *
2916
     * @return bool
2917
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2918
     */
2919
    public static function iconv_loaded(): bool
2920
    {
2921
        return \extension_loaded('iconv');
2922
    }
2923
2924
    /**
2925
     * alias for "UTF8::decimal_to_chr()"
2926
     *
2927
     * @param mixed $int
2928
     *
2929
     * @return string
2930
     *
2931
     * @see UTF8::decimal_to_chr()
2932
     */
2933 4
    public static function int_to_chr($int): string
2934
    {
2935 4
        return self::decimal_to_chr($int);
2936
    }
2937
2938
    /**
2939
     * Converts Integer to hexadecimal U+xxxx code point representation.
2940
     *
2941
     * INFO: opposite to UTF8::hex_to_int()
2942
     *
2943
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2944
     * @param string $pfix [optional]
2945
     *
2946
     * @return string the code point, or empty string on failure
2947
     */
2948 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2949
    {
2950 6
        $hex = \dechex($int);
2951
2952 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2953
2954 6
        return $pfix . $hex . '';
2955
    }
2956
2957
    /**
2958
     * Checks whether intl-char is available on the server.
2959
     *
2960
     * @return bool
2961
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2962
     */
2963
    public static function intlChar_loaded(): bool
2964
    {
2965
        return \class_exists('IntlChar');
2966
    }
2967
2968
    /**
2969
     * Checks whether intl is available on the server.
2970
     *
2971
     * @return bool
2972
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2973
     */
2974 5
    public static function intl_loaded(): bool
2975
    {
2976 5
        return \extension_loaded('intl');
2977
    }
2978
2979
    /**
2980
     * alias for "UTF8::is_ascii()"
2981
     *
2982
     * @param string $str
2983
     *
2984
     * @return bool
2985
     *
2986
     * @see UTF8::is_ascii()
2987
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2988
     */
2989 2
    public static function isAscii(string $str): bool
2990
    {
2991 2
        return self::is_ascii($str);
2992
    }
2993
2994
    /**
2995
     * alias for "UTF8::is_base64()"
2996
     *
2997
     * @param string $str
2998
     *
2999
     * @return bool
3000
     *
3001
     * @see UTF8::is_base64()
3002
     * @deprecated <p>use "UTF8::is_base64()"</p>
3003
     */
3004 2
    public static function isBase64($str): bool
3005
    {
3006 2
        return self::is_base64($str);
3007
    }
3008
3009
    /**
3010
     * alias for "UTF8::is_binary()"
3011
     *
3012
     * @param mixed $str
3013
     * @param bool  $strict
3014
     *
3015
     * @return bool
3016
     *
3017
     * @see UTF8::is_binary()
3018
     * @deprecated <p>use "UTF8::is_binary()"</p>
3019
     */
3020 4
    public static function isBinary($str, $strict = false): bool
3021
    {
3022 4
        return self::is_binary($str, $strict);
3023
    }
3024
3025
    /**
3026
     * alias for "UTF8::is_bom()"
3027
     *
3028
     * @param string $utf8_chr
3029
     *
3030
     * @return bool
3031
     *
3032
     * @see UTF8::is_bom()
3033
     * @deprecated <p>use "UTF8::is_bom()"</p>
3034
     */
3035 2
    public static function isBom(string $utf8_chr): bool
3036
    {
3037 2
        return self::is_bom($utf8_chr);
3038
    }
3039
3040
    /**
3041
     * alias for "UTF8::is_html()"
3042
     *
3043
     * @param string $str
3044
     *
3045
     * @return bool
3046
     *
3047
     * @see UTF8::is_html()
3048
     * @deprecated <p>use "UTF8::is_html()"</p>
3049
     */
3050 2
    public static function isHtml(string $str): bool
3051
    {
3052 2
        return self::is_html($str);
3053
    }
3054
3055
    /**
3056
     * alias for "UTF8::is_json()"
3057
     *
3058
     * @param string $str
3059
     *
3060
     * @return bool
3061
     *
3062
     * @see UTF8::is_json()
3063
     * @deprecated <p>use "UTF8::is_json()"</p>
3064
     */
3065
    public static function isJson(string $str): bool
3066
    {
3067
        return self::is_json($str);
3068
    }
3069
3070
    /**
3071
     * alias for "UTF8::is_utf16()"
3072
     *
3073
     * @param mixed $str
3074
     *
3075
     * @return false|int
3076
     *                   <strong>false</strong> if is't not UTF16,<br>
3077
     *                   <strong>1</strong> for UTF-16LE,<br>
3078
     *                   <strong>2</strong> for UTF-16BE
3079
     *
3080
     * @see UTF8::is_utf16()
3081
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3082
     */
3083 2
    public static function isUtf16($str)
3084
    {
3085 2
        return self::is_utf16($str);
3086
    }
3087
3088
    /**
3089
     * alias for "UTF8::is_utf32()"
3090
     *
3091
     * @param mixed $str
3092
     *
3093
     * @return false|int
3094
     *                   <strong>false</strong> if is't not UTF16,
3095
     *                   <strong>1</strong> for UTF-32LE,
3096
     *                   <strong>2</strong> for UTF-32BE
3097
     *
3098
     * @see UTF8::is_utf32()
3099
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3100
     */
3101 2
    public static function isUtf32($str)
3102
    {
3103 2
        return self::is_utf32($str);
3104
    }
3105
3106
    /**
3107
     * alias for "UTF8::is_utf8()"
3108
     *
3109
     * @param string $str
3110
     * @param bool   $strict
3111
     *
3112
     * @return bool
3113
     *
3114
     * @see UTF8::is_utf8()
3115
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3116
     */
3117 17
    public static function isUtf8($str, $strict = false): bool
3118
    {
3119 17
        return self::is_utf8($str, $strict);
3120
    }
3121
3122
    /**
3123
     * Returns true if the string contains only alphabetic chars, false otherwise.
3124
     *
3125
     * @param string $str
3126
     *
3127
     * @return bool
3128
     *              Whether or not $str contains only alphabetic chars
3129
     */
3130 10
    public static function is_alpha(string $str): bool
3131
    {
3132 10
        if (self::$SUPPORT['mbstring'] === true) {
3133
            /** @noinspection PhpComposerExtensionStubsInspection */
3134 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3135
        }
3136
3137
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3138
    }
3139
3140
    /**
3141
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3142
     *
3143
     * @param string $str
3144
     *
3145
     * @return bool
3146
     *              Whether or not $str contains only alphanumeric chars
3147
     */
3148 13
    public static function is_alphanumeric(string $str): bool
3149
    {
3150 13
        if (self::$SUPPORT['mbstring'] === true) {
3151
            /** @noinspection PhpComposerExtensionStubsInspection */
3152 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3153
        }
3154
3155
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3156
    }
3157
3158
    /**
3159
     * Checks if a string is 7 bit ASCII.
3160
     *
3161
     * @param string $str <p>The string to check.</p>
3162
     *
3163
     * @return bool
3164
     *              <strong>true</strong> if it is ASCII<br>
3165
     *              <strong>false</strong> otherwise
3166
     */
3167 137
    public static function is_ascii(string $str): bool
3168
    {
3169 137
        if ($str === '') {
3170 10
            return true;
3171
        }
3172
3173 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3174
    }
3175
3176
    /**
3177
     * Returns true if the string is base64 encoded, false otherwise.
3178
     *
3179
     * @param mixed|string $str                <p>The input string.</p>
3180
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3181
     *
3182
     * @return bool whether or not $str is base64 encoded
3183
     */
3184 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3185
    {
3186 16
        if ($emptyStringIsValid === false && $str === '') {
3187 3
            return false;
3188
        }
3189
3190
        /**
3191
         * @psalm-suppress RedundantConditionGivenDocblockType
3192
         */
3193 15
        if (\is_string($str) === false) {
3194 2
            return false;
3195
        }
3196
3197 15
        $base64String = \base64_decode($str, true);
3198
3199 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3200
    }
3201
3202
    /**
3203
     * Check if the input is binary... (is look like a hack).
3204
     *
3205
     * @param mixed $input
3206
     * @param bool  $strict
3207
     *
3208
     * @return bool
3209
     */
3210 39
    public static function is_binary($input, bool $strict = false): bool
3211
    {
3212 39
        $input = (string) $input;
3213 39
        if ($input === '') {
3214 10
            return false;
3215
        }
3216
3217 39
        if (\preg_match('~^[01]+$~', $input)) {
3218 13
            return true;
3219
        }
3220
3221 39
        $ext = self::get_file_type($input);
3222 39
        if ($ext['type'] === 'binary') {
3223 7
            return true;
3224
        }
3225
3226 36
        $testLength = \strlen($input);
3227 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3228 36
        if (($testNull / $testLength) > 0.25) {
3229 12
            return true;
3230
        }
3231
3232 34
        if ($strict === true) {
3233 34
            if (self::$SUPPORT['finfo'] === false) {
3234
                throw new \RuntimeException('ext-fileinfo: is not installed');
3235
            }
3236
3237
            /** @noinspection PhpComposerExtensionStubsInspection */
3238 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3239 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3240 15
                return true;
3241
            }
3242
        }
3243
3244 30
        return false;
3245
    }
3246
3247
    /**
3248
     * Check if the file is binary.
3249
     *
3250
     * @param string $file
3251
     *
3252
     * @return bool
3253
     */
3254 6
    public static function is_binary_file($file): bool
3255
    {
3256
        // init
3257 6
        $block = '';
3258
3259 6
        $fp = \fopen($file, 'rb');
3260 6
        if (\is_resource($fp)) {
3261 6
            $block = \fread($fp, 512);
3262 6
            \fclose($fp);
3263
        }
3264
3265 6
        if ($block === '') {
3266 2
            return false;
3267
        }
3268
3269 6
        return self::is_binary($block, true);
3270
    }
3271
3272
    /**
3273
     * Returns true if the string contains only whitespace chars, false otherwise.
3274
     *
3275
     * @param string $str
3276
     *
3277
     * @return bool
3278
     *              Whether or not $str contains only whitespace characters
3279
     */
3280 15
    public static function is_blank(string $str): bool
3281
    {
3282 15
        if (self::$SUPPORT['mbstring'] === true) {
3283
            /** @noinspection PhpComposerExtensionStubsInspection */
3284 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3285
        }
3286
3287
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3288
    }
3289
3290
    /**
3291
     * Checks if the given string is equal to any "Byte Order Mark".
3292
     *
3293
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3294
     *
3295
     * @param string $str <p>The input string.</p>
3296
     *
3297
     * @return bool
3298
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3299
     */
3300 2
    public static function is_bom($str): bool
3301
    {
3302
        /** @noinspection PhpUnusedLocalVariableInspection */
3303 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3304 2
            if ($str === $bomString) {
3305 2
                return true;
3306
            }
3307
        }
3308
3309 2
        return false;
3310
    }
3311
3312
    /**
3313
     * Determine whether the string is considered to be empty.
3314
     *
3315
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3316
     * empty() does not generate a warning if the variable does not exist.
3317
     *
3318
     * @param mixed $str
3319
     *
3320
     * @return bool whether or not $str is empty()
3321
     */
3322
    public static function is_empty($str): bool
3323
    {
3324
        return empty($str);
3325
    }
3326
3327
    /**
3328
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3329
     *
3330
     * @param string $str
3331
     *
3332
     * @return bool
3333
     *              Whether or not $str contains only hexadecimal chars
3334
     */
3335 13
    public static function is_hexadecimal(string $str): bool
3336
    {
3337 13
        if (self::$SUPPORT['mbstring'] === true) {
3338
            /** @noinspection PhpComposerExtensionStubsInspection */
3339 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3340
        }
3341
3342
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3343
    }
3344
3345
    /**
3346
     * Check if the string contains any html-tags <lall>.
3347
     *
3348
     * @param string $str <p>The input string.</p>
3349
     *
3350
     * @return bool
3351
     */
3352 3
    public static function is_html(string $str): bool
3353
    {
3354 3
        if ($str === '') {
3355 3
            return false;
3356
        }
3357
3358
        // init
3359 3
        $matches = [];
3360
3361 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/u", $str, $matches);
3362
3363 3
        return \count($matches) !== 0;
3364
    }
3365
3366
    /**
3367
     * Try to check if "$str" is an json-string.
3368
     *
3369
     * @param string $str                              <p>The input string.</p>
3370
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3371
     *
3372
     * @return bool
3373
     */
3374 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3375
    {
3376 42
        if ($str === '') {
3377 4
            return false;
3378
        }
3379
3380 40
        if (self::$SUPPORT['json'] === false) {
3381
            throw new \RuntimeException('ext-json: is not installed');
3382
        }
3383
3384 40
        $json = self::json_decode($str);
3385 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3386 18
            return false;
3387
        }
3388
3389
        if (
3390 24
            $onlyArrayOrObjectResultsAreValid === true
3391
            &&
3392 24
            \is_object($json) === false
3393
            &&
3394 24
            \is_array($json) === false
3395
        ) {
3396 5
            return false;
3397
        }
3398
3399
        /** @noinspection PhpComposerExtensionStubsInspection */
3400 19
        return \json_last_error() === \JSON_ERROR_NONE;
3401
    }
3402
3403
    /**
3404
     * @param string $str
3405
     *
3406
     * @return bool
3407
     */
3408 8
    public static function is_lowercase(string $str): bool
3409
    {
3410 8
        if (self::$SUPPORT['mbstring'] === true) {
3411
            /** @noinspection PhpComposerExtensionStubsInspection */
3412 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3413
        }
3414
3415
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3416
    }
3417
3418
    /**
3419
     * Returns true if the string is serialized, false otherwise.
3420
     *
3421
     * @param string $str
3422
     *
3423
     * @return bool whether or not $str is serialized
3424
     */
3425 7
    public static function is_serialized(string $str): bool
3426
    {
3427 7
        if ($str === '') {
3428 1
            return false;
3429
        }
3430
3431
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3432
        /** @noinspection UnserializeExploitsInspection */
3433 6
        return $str === 'b:0;'
3434
               ||
3435 6
               @\unserialize($str) !== false;
3436
    }
3437
3438
    /**
3439
     * Returns true if the string contains only lower case chars, false
3440
     * otherwise.
3441
     *
3442
     * @param string $str <p>The input string.</p>
3443
     *
3444
     * @return bool
3445
     *              Whether or not $str contains only lower case characters
3446
     */
3447 8
    public static function is_uppercase(string $str): bool
3448
    {
3449 8
        if (self::$SUPPORT['mbstring'] === true) {
3450
            /** @noinspection PhpComposerExtensionStubsInspection */
3451 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3452
        }
3453
3454
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3455
    }
3456
3457
    /**
3458
     * Check if the string is UTF-16.
3459
     *
3460
     * @param mixed $str                   <p>The input string.</p>
3461
     * @param bool  $checkIfStringIsBinary
3462
     *
3463
     * @return false|int
3464
     *                   <strong>false</strong> if is't not UTF-16,<br>
3465
     *                   <strong>1</strong> for UTF-16LE,<br>
3466
     *                   <strong>2</strong> for UTF-16BE
3467
     */
3468 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3469
    {
3470
        // init
3471 22
        $str = (string) $str;
3472 22
        $strChars = [];
3473
3474
        if (
3475 22
            $checkIfStringIsBinary === true
3476
            &&
3477 22
            self::is_binary($str, true) === false
3478
        ) {
3479 2
            return false;
3480
        }
3481
3482 22
        if (self::$SUPPORT['mbstring'] === false) {
3483 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3484
        }
3485
3486 22
        $str = self::remove_bom($str);
3487
3488 22
        $maybeUTF16LE = 0;
3489 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3490 22
        if ($test) {
3491 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3492 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3493 15
            if ($test3 === $test) {
3494 15
                if (\count($strChars) === 0) {
3495 15
                    $strChars = self::count_chars($str, true, false);
3496
                }
3497 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3498 15
                    if (\in_array($test3char, $strChars, true) === true) {
3499 15
                        ++$maybeUTF16LE;
3500
                    }
3501
                }
3502 15
                unset($test3charEmpty);
3503
            }
3504
        }
3505
3506 22
        $maybeUTF16BE = 0;
3507 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3508 22
        if ($test) {
3509 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3510 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3511 15
            if ($test3 === $test) {
3512 15
                if (\count($strChars) === 0) {
3513 7
                    $strChars = self::count_chars($str, true, false);
3514
                }
3515 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3516 15
                    if (\in_array($test3char, $strChars, true) === true) {
3517 15
                        ++$maybeUTF16BE;
3518
                    }
3519
                }
3520 15
                unset($test3charEmpty);
3521
            }
3522
        }
3523
3524 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3525 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3526 4
                return 1;
3527
            }
3528
3529 6
            return 2;
3530
        }
3531
3532 18
        return false;
3533
    }
3534
3535
    /**
3536
     * Check if the string is UTF-32.
3537
     *
3538
     * @param mixed $str                   <p>The input string.</p>
3539
     * @param bool  $checkIfStringIsBinary
3540
     *
3541
     * @return false|int
3542
     *                   <strong>false</strong> if is't not UTF-32,<br>
3543
     *                   <strong>1</strong> for UTF-32LE,<br>
3544
     *                   <strong>2</strong> for UTF-32BE
3545
     */
3546 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3547
    {
3548
        // init
3549 18
        $str = (string) $str;
3550 18
        $strChars = [];
3551
3552
        if (
3553 18
            $checkIfStringIsBinary === true
3554
            &&
3555 18
            self::is_binary($str, true) === false
3556
        ) {
3557 2
            return false;
3558
        }
3559
3560 18
        if (self::$SUPPORT['mbstring'] === false) {
3561 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3562
        }
3563
3564 18
        $str = self::remove_bom($str);
3565
3566 18
        $maybeUTF32LE = 0;
3567 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3568 18
        if ($test) {
3569 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3570 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3571 11
            if ($test3 === $test) {
3572 11
                if (\count($strChars) === 0) {
3573 11
                    $strChars = self::count_chars($str, true, false);
3574
                }
3575 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3576 11
                    if (\in_array($test3char, $strChars, true) === true) {
3577 11
                        ++$maybeUTF32LE;
3578
                    }
3579
                }
3580 11
                unset($test3charEmpty);
3581
            }
3582
        }
3583
3584 18
        $maybeUTF32BE = 0;
3585 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3586 18
        if ($test) {
3587 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3588 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3589 11
            if ($test3 === $test) {
3590 11
                if (\count($strChars) === 0) {
3591 7
                    $strChars = self::count_chars($str, true, false);
3592
                }
3593 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3594 11
                    if (\in_array($test3char, $strChars, true) === true) {
3595 11
                        ++$maybeUTF32BE;
3596
                    }
3597
                }
3598 11
                unset($test3charEmpty);
3599
            }
3600
        }
3601
3602 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3603 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3604 2
                return 1;
3605
            }
3606
3607 2
            return 2;
3608
        }
3609
3610 18
        return false;
3611
    }
3612
3613
    /**
3614
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3615
     *
3616
     * @see http://hsivonen.iki.fi/php-utf8/
3617
     *
3618
     * @param string|string[] $str    <p>The string to be checked.</p>
3619
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3620
     *
3621
     * @return bool
3622
     */
3623 106
    public static function is_utf8($str, bool $strict = false): bool
3624
    {
3625 106
        if (\is_array($str) === true) {
3626 2
            foreach ($str as &$v) {
3627 2
                if (self::is_utf8($v, $strict) === false) {
3628 2
                    return false;
3629
                }
3630
            }
3631
3632
            return true;
3633
        }
3634
3635 106
        if ($str === '') {
3636 12
            return true;
3637
        }
3638
3639 102
        if ($strict === true) {
3640 2
            $isBinary = self::is_binary($str, true);
3641
3642 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3643 2
                return false;
3644
            }
3645
3646
            if ($isBinary && self::is_utf32($str, false) !== false) {
3647
                return false;
3648
            }
3649
        }
3650
3651 102
        if (self::pcre_utf8_support() !== true) {
3652
3653
            // If even just the first character can be matched, when the /u
3654
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3655
            // invalid, nothing at all will match, even if the string contains
3656
            // some valid sequences
3657
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3658
        }
3659
3660 102
        $mState = 0; // cached expected number of octets after the current octet
3661
        // until the beginning of the next UTF8 character sequence
3662 102
        $mUcs4 = 0; // cached Unicode character
3663 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3664
3665 102
        if (self::$ORD === null) {
3666
            self::$ORD = self::getData('ord');
3667
        }
3668
3669 102
        $len = \strlen((string) $str);
3670
        /** @noinspection ForeachInvariantsInspection */
3671 102
        for ($i = 0; $i < $len; ++$i) {
3672 102
            $in = self::$ORD[$str[$i]];
3673 102
            if ($mState === 0) {
3674
                // When mState is zero we expect either a US-ASCII character or a
3675
                // multi-octet sequence.
3676 102
                if ((0x80 & $in) === 0) {
3677
                    // US-ASCII, pass straight through.
3678 97
                    $mBytes = 1;
3679 83
                } elseif ((0xE0 & $in) === 0xC0) {
3680
                    // First octet of 2 octet sequence.
3681 73
                    $mUcs4 = $in;
3682 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3683 73
                    $mState = 1;
3684 73
                    $mBytes = 2;
3685 58
                } elseif ((0xF0 & $in) === 0xE0) {
3686
                    // First octet of 3 octet sequence.
3687 42
                    $mUcs4 = $in;
3688 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3689 42
                    $mState = 2;
3690 42
                    $mBytes = 3;
3691 29
                } elseif ((0xF8 & $in) === 0xF0) {
3692
                    // First octet of 4 octet sequence.
3693 18
                    $mUcs4 = $in;
3694 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3695 18
                    $mState = 3;
3696 18
                    $mBytes = 4;
3697 13
                } elseif ((0xFC & $in) === 0xF8) {
3698
                    /* First octet of 5 octet sequence.
3699
                     *
3700
                     * This is illegal because the encoded codepoint must be either
3701
                     * (a) not the shortest form or
3702
                     * (b) outside the Unicode range of 0-0x10FFFF.
3703
                     * Rather than trying to resynchronize, we will carry on until the end
3704
                     * of the sequence and let the later error handling code catch it.
3705
                     */
3706 5
                    $mUcs4 = $in;
3707 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3708 5
                    $mState = 4;
3709 5
                    $mBytes = 5;
3710 10
                } elseif ((0xFE & $in) === 0xFC) {
3711
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3712 5
                    $mUcs4 = $in;
3713 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3714 5
                    $mState = 5;
3715 5
                    $mBytes = 6;
3716
                } else {
3717
                    // Current octet is neither in the US-ASCII range nor a legal first
3718
                    // octet of a multi-octet sequence.
3719 102
                    return false;
3720
                }
3721 83
            } elseif ((0xC0 & $in) === 0x80) {
3722
3723
                // When mState is non-zero, we expect a continuation of the multi-octet
3724
                // sequence
3725
3726
                // Legal continuation.
3727 75
                $shift = ($mState - 1) * 6;
3728 75
                $tmp = $in;
3729 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3730 75
                $mUcs4 |= $tmp;
3731
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3732
                // Unicode code point to be output.
3733 75
                if (--$mState === 0) {
3734
                    // Check for illegal sequences and code points.
3735
                    //
3736
                    // From Unicode 3.1, non-shortest form is illegal
3737
                    if (
3738 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3739
                        ||
3740 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3741
                        ||
3742 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3743
                        ||
3744 75
                        ($mBytes > 4)
3745
                        ||
3746
                        // From Unicode 3.2, surrogate characters are illegal.
3747 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3748
                        ||
3749
                        // Code points outside the Unicode range are illegal.
3750 75
                        ($mUcs4 > 0x10FFFF)
3751
                    ) {
3752 8
                        return false;
3753
                    }
3754
                    // initialize UTF8 cache
3755 75
                    $mState = 0;
3756 75
                    $mUcs4 = 0;
3757 75
                    $mBytes = 1;
3758
                }
3759
            } else {
3760
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3761
                // Incomplete multi-octet sequence.
3762 35
                return false;
3763
            }
3764
        }
3765
3766 67
        return true;
3767
    }
3768
3769
    /**
3770
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3771
     * Decodes a JSON string
3772
     *
3773
     * @see http://php.net/manual/en/function.json-decode.php
3774
     *
3775
     * @param string $json    <p>
3776
     *                        The <i>json</i> string being decoded.
3777
     *                        </p>
3778
     *                        <p>
3779
     *                        This function only works with UTF-8 encoded strings.
3780
     *                        </p>
3781
     *                        <p>PHP implements a superset of
3782
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3783
     *                        only supports these values when they are nested inside an array or an object.
3784
     *                        </p>
3785
     * @param bool   $assoc   [optional] <p>
3786
     *                        When <b>TRUE</b>, returned objects will be converted into
3787
     *                        associative arrays.
3788
     *                        </p>
3789
     * @param int    $depth   [optional] <p>
3790
     *                        User specified recursion depth.
3791
     *                        </p>
3792
     * @param int    $options [optional] <p>
3793
     *                        Bitmask of JSON decode options. Currently only
3794
     *                        <b>JSON_BIGINT_AS_STRING</b>
3795
     *                        is supported (default is to cast large integers as floats)
3796
     *                        </p>
3797
     *
3798
     * @return mixed
3799
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3800
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3801
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3802
     *               is deeper than the recursion limit.
3803
     */
3804 43
    public static function json_decode(
3805
        string $json,
3806
        bool $assoc = false,
3807
        int $depth = 512,
3808
        int $options = 0
3809
    ) {
3810 43
        $json = self::filter($json);
3811
3812 43
        if (self::$SUPPORT['json'] === false) {
3813
            throw new \RuntimeException('ext-json: is not installed');
3814
        }
3815
3816
        /** @noinspection PhpComposerExtensionStubsInspection */
3817 43
        return \json_decode($json, $assoc, $depth, $options);
3818
    }
3819
3820
    /**
3821
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3822
     * Returns the JSON representation of a value.
3823
     *
3824
     * @see http://php.net/manual/en/function.json-encode.php
3825
     *
3826
     * @param mixed $value   <p>
3827
     *                       The <i>value</i> being encoded. Can be any type except
3828
     *                       a resource.
3829
     *                       </p>
3830
     *                       <p>
3831
     *                       All string data must be UTF-8 encoded.
3832
     *                       </p>
3833
     *                       <p>PHP implements a superset of
3834
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3835
     *                       only supports these values when they are nested inside an array or an object.
3836
     *                       </p>
3837
     * @param int   $options [optional] <p>
3838
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3839
     *                       <b>JSON_HEX_TAG</b>,
3840
     *                       <b>JSON_HEX_AMP</b>,
3841
     *                       <b>JSON_HEX_APOS</b>,
3842
     *                       <b>JSON_NUMERIC_CHECK</b>,
3843
     *                       <b>JSON_PRETTY_PRINT</b>,
3844
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3845
     *                       <b>JSON_FORCE_OBJECT</b>,
3846
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3847
     *                       constants is described on
3848
     *                       the JSON constants page.
3849
     *                       </p>
3850
     * @param int   $depth   [optional] <p>
3851
     *                       Set the maximum depth. Must be greater than zero.
3852
     *                       </p>
3853
     *
3854
     * @return false|string
3855
     *                      A JSON encoded <strong>string</strong> on success or<br>
3856
     *                      <strong>FALSE</strong> on failure
3857
     */
3858 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3859
    {
3860 5
        $value = self::filter($value);
3861
3862 5
        if (self::$SUPPORT['json'] === false) {
3863
            throw new \RuntimeException('ext-json: is not installed');
3864
        }
3865
3866
        /** @noinspection PhpComposerExtensionStubsInspection */
3867 5
        return \json_encode($value, $options, $depth);
3868
    }
3869
3870
    /**
3871
     * Checks whether JSON is available on the server.
3872
     *
3873
     * @return bool
3874
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3875
     */
3876
    public static function json_loaded(): bool
3877
    {
3878
        return \function_exists('json_decode');
3879
    }
3880
3881
    /**
3882
     * Makes string's first char lowercase.
3883
     *
3884
     * @param string      $str                   <p>The input string</p>
3885
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3886
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3887
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3888
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3889
     *
3890
     * @return string the resulting string
3891
     */
3892 46
    public static function lcfirst(
3893
        string $str,
3894
        string $encoding = 'UTF-8',
3895
        bool $cleanUtf8 = false,
3896
        string $lang = null,
3897
        bool $tryToKeepStringLength = false
3898
    ): string {
3899 46
        if ($cleanUtf8 === true) {
3900
            $str = self::clean($str);
3901
        }
3902
3903 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3904
3905 46
        if ($encoding === 'UTF-8') {
3906 43
            $strPartTwo = (string) \mb_substr($str, 1);
3907
3908 43
            if ($useMbFunction === true) {
3909 43
                $strPartOne = \mb_strtolower(
3910 43
                    (string) \mb_substr($str, 0, 1)
3911
                );
3912
            } else {
3913
                $strPartOne = self::strtolower(
3914
                    (string) \mb_substr($str, 0, 1),
3915
                    $encoding,
3916
                    false,
3917
                    $lang,
3918 43
                    $tryToKeepStringLength
3919
                );
3920
            }
3921
        } else {
3922 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3923
3924 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3925
3926 3
            $strPartOne = self::strtolower(
3927 3
                (string) self::substr($str, 0, 1, $encoding),
3928 3
                $encoding,
3929 3
                false,
3930 3
                $lang,
3931 3
                $tryToKeepStringLength
3932
            );
3933
        }
3934
3935 46
        return $strPartOne . $strPartTwo;
3936
    }
3937
3938
    /**
3939
     * alias for "UTF8::lcfirst()"
3940
     *
3941
     * @param string      $str
3942
     * @param string      $encoding
3943
     * @param bool        $cleanUtf8
3944
     * @param string|null $lang
3945
     * @param bool        $tryToKeepStringLength
3946
     *
3947
     * @return string
3948
     *
3949
     * @see UTF8::lcfirst()
3950
     */
3951 2
    public static function lcword(
3952
        string $str,
3953
        string $encoding = 'UTF-8',
3954
        bool $cleanUtf8 = false,
3955
        string $lang = null,
3956
        bool $tryToKeepStringLength = false
3957
    ): string {
3958 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3959
    }
3960
3961
    /**
3962
     * Lowercase for all words in the string.
3963
     *
3964
     * @param string      $str                   <p>The input string.</p>
3965
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3966
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3967
     *                                           a new word.</p>
3968
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3969
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3970
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3971
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3972
     *
3973
     * @return string
3974
     */
3975 2
    public static function lcwords(
3976
        string $str,
3977
        array $exceptions = [],
3978
        string $charlist = '',
3979
        string $encoding = 'UTF-8',
3980
        bool $cleanUtf8 = false,
3981
        string $lang = null,
3982
        bool $tryToKeepStringLength = false
3983
    ): string {
3984 2
        if (!$str) {
3985 2
            return '';
3986
        }
3987
3988 2
        $words = self::str_to_words($str, $charlist);
3989 2
        $useExceptions = \count($exceptions) > 0;
3990
3991 2
        foreach ($words as &$word) {
3992 2
            if (!$word) {
3993 2
                continue;
3994
            }
3995
3996
            if (
3997 2
                $useExceptions === false
3998
                ||
3999 2
                !\in_array($word, $exceptions, true)
4000
            ) {
4001 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4002
            }
4003
        }
4004
4005 2
        return \implode('', $words);
4006
    }
4007
4008
    /**
4009
     * alias for "UTF8::lcfirst()"
4010
     *
4011
     * @param string      $str
4012
     * @param string      $encoding
4013
     * @param bool        $cleanUtf8
4014
     * @param string|null $lang
4015
     * @param bool        $tryToKeepStringLength
4016
     *
4017
     * @return string
4018
     *
4019
     * @see UTF8::lcfirst()
4020
     */
4021 5
    public static function lowerCaseFirst(
4022
        string $str,
4023
        string $encoding = 'UTF-8',
4024
        bool $cleanUtf8 = false,
4025
        string $lang = null,
4026
        bool $tryToKeepStringLength = false
4027
    ): string {
4028 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4029
    }
4030
4031
    /**
4032
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4033
     *
4034
     * @param string      $str   <p>The string to be trimmed</p>
4035
     * @param string|null $chars <p>Optional characters to be stripped</p>
4036
     *
4037
     * @return string the string with unwanted characters stripped from the left
4038
     */
4039 22
    public static function ltrim(string $str = '', string $chars = null): string
4040
    {
4041 22
        if ($str === '') {
4042 3
            return '';
4043
        }
4044
4045 21
        if ($chars) {
4046 10
            $chars = \preg_quote($chars, '/');
4047 10
            $pattern = "^[${chars}]+";
4048
        } else {
4049 14
            $pattern = "^[\s]+";
4050
        }
4051
4052 21
        if (self::$SUPPORT['mbstring'] === true) {
4053
            /** @noinspection PhpComposerExtensionStubsInspection */
4054 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4055
        }
4056
4057
        return self::regex_replace($str, $pattern, '', '', '/');
4058
    }
4059
4060
    /**
4061
     * Returns the UTF-8 character with the maximum code point in the given data.
4062
     *
4063
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4064
     *
4065
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4066
     */
4067 2
    public static function max($arg)
4068
    {
4069 2
        if (\is_array($arg) === true) {
4070 2
            $arg = \implode('', $arg);
4071
        }
4072
4073 2
        $codepoints = self::codepoints($arg, false);
4074 2
        if (\count($codepoints) === 0) {
4075 2
            return null;
4076
        }
4077
4078 2
        $codepoint_max = \max($codepoints);
4079
4080 2
        return self::chr($codepoint_max);
4081
    }
4082
4083
    /**
4084
     * Calculates and returns the maximum number of bytes taken by any
4085
     * UTF-8 encoded character in the given string.
4086
     *
4087
     * @param string $str <p>The original Unicode string.</p>
4088
     *
4089
     * @return int max byte lengths of the given chars
4090
     */
4091 2
    public static function max_chr_width(string $str): int
4092
    {
4093 2
        $bytes = self::chr_size_list($str);
4094 2
        if (\count($bytes) > 0) {
4095 2
            return (int) \max($bytes);
4096
        }
4097
4098 2
        return 0;
4099
    }
4100
4101
    /**
4102
     * Checks whether mbstring is available on the server.
4103
     *
4104
     * @return bool
4105
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4106
     */
4107 27
    public static function mbstring_loaded(): bool
4108
    {
4109 27
        return \extension_loaded('mbstring');
4110
    }
4111
4112
    /**
4113
     * Returns the UTF-8 character with the minimum code point in the given data.
4114
     *
4115
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4116
     *
4117
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4118
     */
4119 2
    public static function min($arg)
4120
    {
4121 2
        if (\is_array($arg) === true) {
4122 2
            $arg = \implode('', $arg);
4123
        }
4124
4125 2
        $codepoints = self::codepoints($arg, false);
4126 2
        if (\count($codepoints) === 0) {
4127 2
            return null;
4128
        }
4129
4130 2
        $codepoint_min = \min($codepoints);
4131
4132 2
        return self::chr($codepoint_min);
4133
    }
4134
4135
    /**
4136
     * alias for "UTF8::normalize_encoding()"
4137
     *
4138
     * @param mixed $encoding
4139
     * @param mixed $fallback
4140
     *
4141
     * @return mixed
4142
     *
4143
     * @see UTF8::normalize_encoding()
4144
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4145
     */
4146 2
    public static function normalizeEncoding($encoding, $fallback = '')
4147
    {
4148 2
        return self::normalize_encoding($encoding, $fallback);
4149
    }
4150
4151
    /**
4152
     * Normalize the encoding-"name" input.
4153
     *
4154
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4155
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4156
     *
4157
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4158
     */
4159 323
    public static function normalize_encoding($encoding, $fallback = '')
4160
    {
4161 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4162
4163
        // init
4164 323
        $encoding = (string) $encoding;
4165
4166 323
        if (!$encoding) {
4167 278
            return $fallback;
4168
        }
4169
4170
        if (
4171 50
            $encoding === 'UTF-8'
4172
            ||
4173 50
            $encoding === 'UTF8'
4174
        ) {
4175 24
            return 'UTF-8';
4176
        }
4177
4178
        if (
4179 43
            $encoding === '8BIT'
4180
            ||
4181 43
            $encoding === 'BINARY'
4182
        ) {
4183
            return 'CP850';
4184
        }
4185
4186
        if (
4187 43
            $encoding === 'HTML'
4188
            ||
4189 43
            $encoding === 'HTML-ENTITIES'
4190
        ) {
4191 2
            return 'HTML-ENTITIES';
4192
        }
4193
4194
        if (
4195 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4196
            ||
4197 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4198
        ) {
4199 1
            return $fallback;
4200
        }
4201
4202 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4203 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4204
        }
4205
4206 6
        if (self::$ENCODINGS === null) {
4207 1
            self::$ENCODINGS = self::getData('encodings');
4208
        }
4209
4210 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4211 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4212
4213 4
            return $encoding;
4214
        }
4215
4216 5
        $encodingOrig = $encoding;
4217 5
        $encoding = \strtoupper($encoding);
4218 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/u', '', $encoding);
4219
4220
        $equivalences = [
4221 5
            'ISO8859'     => 'ISO-8859-1',
4222
            'ISO88591'    => 'ISO-8859-1',
4223
            'ISO'         => 'ISO-8859-1',
4224
            'LATIN'       => 'ISO-8859-1',
4225
            'LATIN1'      => 'ISO-8859-1', // Western European
4226
            'ISO88592'    => 'ISO-8859-2',
4227
            'LATIN2'      => 'ISO-8859-2', // Central European
4228
            'ISO88593'    => 'ISO-8859-3',
4229
            'LATIN3'      => 'ISO-8859-3', // Southern European
4230
            'ISO88594'    => 'ISO-8859-4',
4231
            'LATIN4'      => 'ISO-8859-4', // Northern European
4232
            'ISO88595'    => 'ISO-8859-5',
4233
            'ISO88596'    => 'ISO-8859-6', // Greek
4234
            'ISO88597'    => 'ISO-8859-7',
4235
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4236
            'ISO88599'    => 'ISO-8859-9',
4237
            'LATIN5'      => 'ISO-8859-9', // Turkish
4238
            'ISO885911'   => 'ISO-8859-11',
4239
            'TIS620'      => 'ISO-8859-11', // Thai
4240
            'ISO885910'   => 'ISO-8859-10',
4241
            'LATIN6'      => 'ISO-8859-10', // Nordic
4242
            'ISO885913'   => 'ISO-8859-13',
4243
            'LATIN7'      => 'ISO-8859-13', // Baltic
4244
            'ISO885914'   => 'ISO-8859-14',
4245
            'LATIN8'      => 'ISO-8859-14', // Celtic
4246
            'ISO885915'   => 'ISO-8859-15',
4247
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4248
            'ISO885916'   => 'ISO-8859-16',
4249
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4250
            'CP1250'      => 'WINDOWS-1250',
4251
            'WIN1250'     => 'WINDOWS-1250',
4252
            'WINDOWS1250' => 'WINDOWS-1250',
4253
            'CP1251'      => 'WINDOWS-1251',
4254
            'WIN1251'     => 'WINDOWS-1251',
4255
            'WINDOWS1251' => 'WINDOWS-1251',
4256
            'CP1252'      => 'WINDOWS-1252',
4257
            'WIN1252'     => 'WINDOWS-1252',
4258
            'WINDOWS1252' => 'WINDOWS-1252',
4259
            'CP1253'      => 'WINDOWS-1253',
4260
            'WIN1253'     => 'WINDOWS-1253',
4261
            'WINDOWS1253' => 'WINDOWS-1253',
4262
            'CP1254'      => 'WINDOWS-1254',
4263
            'WIN1254'     => 'WINDOWS-1254',
4264
            'WINDOWS1254' => 'WINDOWS-1254',
4265
            'CP1255'      => 'WINDOWS-1255',
4266
            'WIN1255'     => 'WINDOWS-1255',
4267
            'WINDOWS1255' => 'WINDOWS-1255',
4268
            'CP1256'      => 'WINDOWS-1256',
4269
            'WIN1256'     => 'WINDOWS-1256',
4270
            'WINDOWS1256' => 'WINDOWS-1256',
4271
            'CP1257'      => 'WINDOWS-1257',
4272
            'WIN1257'     => 'WINDOWS-1257',
4273
            'WINDOWS1257' => 'WINDOWS-1257',
4274
            'CP1258'      => 'WINDOWS-1258',
4275
            'WIN1258'     => 'WINDOWS-1258',
4276
            'WINDOWS1258' => 'WINDOWS-1258',
4277
            'UTF16'       => 'UTF-16',
4278
            'UTF32'       => 'UTF-32',
4279
            'UTF8'        => 'UTF-8',
4280
            'UTF'         => 'UTF-8',
4281
            'UTF7'        => 'UTF-7',
4282
            '8BIT'        => 'CP850',
4283
            'BINARY'      => 'CP850',
4284
        ];
4285
4286 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4287 4
            $encoding = $equivalences[$encodingUpperHelper];
4288
        }
4289
4290 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4291
4292 5
        return $encoding;
4293
    }
4294
4295
    /**
4296
     * Standardize line ending to unix-like.
4297
     *
4298
     * @param string $str
4299
     *
4300
     * @return string
4301
     */
4302 5
    public static function normalize_line_ending(string $str): string
4303
    {
4304 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4305
    }
4306
4307
    /**
4308
     * Normalize some MS Word special characters.
4309
     *
4310
     * @param string $str <p>The string to be normalized.</p>
4311
     *
4312
     * @return string
4313
     */
4314 38
    public static function normalize_msword(string $str): string
4315
    {
4316 38
        if ($str === '') {
4317 2
            return '';
4318
        }
4319
4320
        $keys = [
4321 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4322
            "\xc2\xbb", // » (U+00BB) in UTF-8
4323
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4324
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4325
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4326
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4327
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4328
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4329
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4330
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4331
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4332
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4333
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4334
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4335
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4336
        ];
4337
4338
        $values = [
4339 38
            '"', // « (U+00AB) in UTF-8
4340
            '"', // » (U+00BB) in UTF-8
4341
            "'", // ‘ (U+2018) in UTF-8
4342
            "'", // ’ (U+2019) in UTF-8
4343
            "'", // ‚ (U+201A) in UTF-8
4344
            "'", // ‛ (U+201B) in UTF-8
4345
            '"', // “ (U+201C) in UTF-8
4346
            '"', // ” (U+201D) in UTF-8
4347
            '"', // „ (U+201E) in UTF-8
4348
            '"', // ‟ (U+201F) in UTF-8
4349
            "'", // ‹ (U+2039) in UTF-8
4350
            "'", // › (U+203A) in UTF-8
4351
            '-', // – (U+2013) in UTF-8
4352
            '-', // — (U+2014) in UTF-8
4353
            '...', // … (U+2026) in UTF-8
4354
        ];
4355
4356 38
        return \str_replace($keys, $values, $str);
4357
    }
4358
4359
    /**
4360
     * Normalize the whitespace.
4361
     *
4362
     * @param string $str                     <p>The string to be normalized.</p>
4363
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4364
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4365
     *                                        bidirectional text chars.</p>
4366
     *
4367
     * @return string
4368
     */
4369 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4370
    {
4371 86
        if ($str === '') {
4372 9
            return '';
4373
        }
4374
4375 86
        static $WHITESPACE_CACHE = [];
4376 86
        $cacheKey = (int) $keepNonBreakingSpace;
4377
4378 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4379 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4380
4381 2
            if ($keepNonBreakingSpace === true) {
4382 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4383
            }
4384
4385 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4386
        }
4387
4388 86
        if ($keepBidiUnicodeControls === false) {
4389 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4390
4391 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4392 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4393
            }
4394
4395 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4396
        }
4397
4398 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4399
    }
4400
4401
    /**
4402
     * Calculates Unicode code point of the given UTF-8 encoded character.
4403
     *
4404
     * INFO: opposite to UTF8::chr()
4405
     *
4406
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4407
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4408
     *
4409
     * @return int
4410
     *             Unicode code point of the given character,<br>
4411
     *             0 on invalid UTF-8 byte sequence
4412
     */
4413 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4414
    {
4415 30
        static $CHAR_CACHE = [];
4416
4417
        // init
4418 30
        $chr = (string) $chr;
4419
4420 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4421 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4422
        }
4423
4424 30
        $cacheKey = $chr . $encoding;
4425 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4426 30
            return $CHAR_CACHE[$cacheKey];
4427
        }
4428
4429
        // check again, if it's still not UTF-8
4430 12
        if ($encoding !== 'UTF-8') {
4431 3
            $chr = self::encode($encoding, $chr);
4432
        }
4433
4434 12
        if (self::$ORD === null) {
4435
            self::$ORD = self::getData('ord');
4436
        }
4437
4438 12
        if (isset(self::$ORD[$chr])) {
4439 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4440
        }
4441
4442
        //
4443
        // fallback via "IntlChar"
4444
        //
4445
4446 6
        if (self::$SUPPORT['intlChar'] === true) {
4447
            /** @noinspection PhpComposerExtensionStubsInspection */
4448 5
            $code = \IntlChar::ord($chr);
4449 5
            if ($code) {
4450 5
                return $CHAR_CACHE[$cacheKey] = $code;
4451
            }
4452
        }
4453
4454
        //
4455
        // fallback via vanilla php
4456
        //
4457
4458
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4459 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4460 1
        $code = $chr ? $chr[1] : 0;
4461
4462 1
        if ($code >= 0xF0 && isset($chr[4])) {
4463
            /** @noinspection UnnecessaryCastingInspection */
4464
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4465
        }
4466
4467 1
        if ($code >= 0xE0 && isset($chr[3])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4470
        }
4471
4472 1
        if ($code >= 0xC0 && isset($chr[2])) {
4473
            /** @noinspection UnnecessaryCastingInspection */
4474 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4475
        }
4476
4477
        return $CHAR_CACHE[$cacheKey] = $code;
4478
    }
4479
4480
    /**
4481
     * Parses the string into an array (into the the second parameter).
4482
     *
4483
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4484
     *          if the second parameter is not set!
4485
     *
4486
     * @see http://php.net/manual/en/function.parse-str.php
4487
     *
4488
     * @param string $str       <p>The input string.</p>
4489
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4490
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4491
     *
4492
     * @return bool
4493
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4494
     */
4495 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4496
    {
4497 2
        if ($cleanUtf8 === true) {
4498 2
            $str = self::clean($str);
4499
        }
4500
4501 2
        if (self::$SUPPORT['mbstring'] === true) {
4502 2
            $return = \mb_parse_str($str, $result);
4503
4504 2
            return $return !== false && $result !== [];
4505
        }
4506
4507
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4508
        \parse_str($str, $result);
4509
4510
        return $result !== [];
4511
    }
4512
4513
    /**
4514
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4515
     *
4516
     * @return bool
4517
     *              <strong>true</strong> if support is available,<br>
4518
     *              <strong>false</strong> otherwise
4519
     */
4520 102
    public static function pcre_utf8_support(): bool
4521
    {
4522
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4523 102
        return (bool) @\preg_match('//u', '');
4524
    }
4525
4526
    /**
4527
     * Create an array containing a range of UTF-8 characters.
4528
     *
4529
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4530
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4531
     *
4532
     * @return string[]
4533
     */
4534 2
    public static function range($var1, $var2): array
4535
    {
4536 2
        if (!$var1 || !$var2) {
4537 2
            return [];
4538
        }
4539
4540 2
        if (self::$SUPPORT['ctype'] === false) {
4541
            throw new \RuntimeException('ext-ctype: is not installed');
4542
        }
4543
4544
        /** @noinspection PhpComposerExtensionStubsInspection */
4545 2
        if (\ctype_digit((string) $var1)) {
4546 2
            $start = (int) $var1;
4547 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4548
            $start = (int) self::hex_to_int($var1);
4549
        } else {
4550 2
            $start = self::ord($var1);
4551
        }
4552
4553 2
        if (!$start) {
4554
            return [];
4555
        }
4556
4557
        /** @noinspection PhpComposerExtensionStubsInspection */
4558 2
        if (\ctype_digit((string) $var2)) {
4559 2
            $end = (int) $var2;
4560 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4561
            $end = (int) self::hex_to_int($var2);
4562
        } else {
4563 2
            $end = self::ord($var2);
4564
        }
4565
4566 2
        if (!$end) {
4567
            return [];
4568
        }
4569
4570 2
        return \array_map(
4571
            static function (int $i): string {
4572 2
                return (string) self::chr($i);
4573 2
            },
4574 2
            \range($start, $end)
4575
        );
4576
    }
4577
4578
    /**
4579
     * Multi decode html entity & fix urlencoded-win1252-chars.
4580
     *
4581
     * e.g:
4582
     * 'test+test'                     => 'test+test'
4583
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4584
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4585
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4586
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4587
     * 'Düsseldorf'                   => 'Düsseldorf'
4588
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4589
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4590
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4591
     *
4592
     * @param string $str          <p>The input string.</p>
4593
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4594
     *
4595
     * @return string
4596
     */
4597 6
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4598
    {
4599 6
        if ($str === '') {
4600 4
            return '';
4601
        }
4602
4603
        if (
4604 6
            \strpos($str, '&') === false
4605
            &&
4606 6
            \strpos($str, '%') === false
4607
            &&
4608 6
            \strpos($str, '+') === false
4609
            &&
4610 6
            \strpos($str, '\u') === false
4611
        ) {
4612 4
            return self::fix_simple_utf8($str);
4613
        }
4614
4615 6
        $str = self::urldecode_unicode_helper($str);
4616
4617
        do {
4618 6
            $str_compare = $str;
4619
4620
            /**
4621
             * @psalm-suppress PossiblyInvalidArgument
4622
             */
4623 6
            $str = self::fix_simple_utf8(
4624 6
                \rawurldecode(
4625 6
                    self::html_entity_decode(
4626 6
                        self::to_utf8($str),
4627 6
                        \ENT_QUOTES | \ENT_HTML5
4628
                    )
4629
                )
4630
            );
4631 6
        } while ($multi_decode === true && $str_compare !== $str);
4632
4633 6
        return $str;
4634
    }
4635
4636
    /**
4637
     * Replaces all occurrences of $pattern in $str by $replacement.
4638
     *
4639
     * @param string $str         <p>The input string.</p>
4640
     * @param string $pattern     <p>The regular expression pattern.</p>
4641
     * @param string $replacement <p>The string to replace with.</p>
4642
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4643
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4644
     *
4645
     * @return string
4646
     */
4647 18
    public static function regex_replace(
4648
        string $str,
4649
        string $pattern,
4650
        string $replacement,
4651
        string $options = '',
4652
        string $delimiter = '/'
4653
    ): string {
4654 18
        if ($options === 'msr') {
4655 9
            $options = 'ms';
4656
        }
4657
4658
        // fallback
4659 18
        if (!$delimiter) {
4660
            $delimiter = '/';
4661
        }
4662
4663 18
        return (string) \preg_replace(
4664 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4665 18
            $replacement,
4666 18
            $str
4667
        );
4668
    }
4669
4670
    /**
4671
     * alias for "UTF8::remove_bom()"
4672
     *
4673
     * @param string $str
4674
     *
4675
     * @return string
4676
     *
4677
     * @see UTF8::remove_bom()
4678
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4679
     */
4680
    public static function removeBOM(string $str): string
4681
    {
4682
        return self::remove_bom($str);
4683
    }
4684
4685
    /**
4686
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4687
     *
4688
     * @param string $str <p>The input string.</p>
4689
     *
4690
     * @return string string without UTF-BOM
4691
     */
4692 82
    public static function remove_bom(string $str): string
4693
    {
4694 82
        if ($str === '') {
4695 9
            return '';
4696
        }
4697
4698 82
        $strLength = \strlen($str);
4699 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4700 82
            if (\strpos($str, $bomString, 0) === 0) {
4701 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4702 10
                if ($strTmp === false) {
4703
                    return '';
4704
                }
4705
4706 10
                $strLength -= (int) $bomByteLength;
4707
4708 82
                $str = (string) $strTmp;
4709
            }
4710
        }
4711
4712 82
        return $str;
4713
    }
4714
4715
    /**
4716
     * Removes duplicate occurrences of a string in another string.
4717
     *
4718
     * @param string          $str  <p>The base string.</p>
4719
     * @param string|string[] $what <p>String to search for in the base string.</p>
4720
     *
4721
     * @return string the result string with removed duplicates
4722
     */
4723 2
    public static function remove_duplicates(string $str, $what = ' '): string
4724
    {
4725 2
        if (\is_string($what) === true) {
4726 2
            $what = [$what];
4727
        }
4728
4729 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4730
            /** @noinspection ForeachSourceInspection */
4731 2
            foreach ($what as $item) {
4732 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
4733
            }
4734
        }
4735
4736 2
        return $str;
4737
    }
4738
4739
    /**
4740
     * Remove html via "strip_tags()" from the string.
4741
     *
4742
     * @param string $str
4743
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4744
     *                              not be stripped. Default: null
4745
     *                              </p>
4746
     *
4747
     * @return string
4748
     */
4749 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4750
    {
4751 6
        return \strip_tags($str, $allowableTags);
4752
    }
4753
4754
    /**
4755
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4756
     *
4757
     * @param string $str
4758
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4759
     *
4760
     * @return string
4761
     */
4762 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4763
    {
4764 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4765
    }
4766
4767
    /**
4768
     * Remove invisible characters from a string.
4769
     *
4770
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4771
     *
4772
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4773
     *
4774
     * @param string $str
4775
     * @param bool   $url_encoded
4776
     * @param string $replacement
4777
     *
4778
     * @return string
4779
     */
4780 116
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4781
    {
4782
        // init
4783 116
        $non_displayables = [];
4784
4785
        // every control character except newline (dec 10),
4786
        // carriage return (dec 13) and horizontal tab (dec 09)
4787 116
        if ($url_encoded) {
4788 116
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4789 116
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4790
        }
4791
4792 116
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4793
4794
        do {
4795 116
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4796 116
        } while ($count !== 0);
4797
4798 116
        return $str;
4799
    }
4800
4801
    /**
4802
     * Returns a new string with the prefix $substring removed, if present.
4803
     *
4804
     * @param string $str
4805
     * @param string $substring <p>The prefix to remove.</p>
4806
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4807
     *
4808
     * @return string string without the prefix $substring
4809
     */
4810 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4811
    {
4812 12
        if ($substring && \strpos($str, $substring) === 0) {
4813 6
            if ($encoding === 'UTF-8') {
4814 4
                return (string) \mb_substr(
4815 4
                    $str,
4816 4
                    (int) \mb_strlen($substring)
4817
                );
4818
            }
4819
4820 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4821
4822 2
            return (string) self::substr(
4823 2
                $str,
4824 2
                (int) self::strlen($substring, $encoding),
4825 2
                null,
4826 2
                $encoding
4827
            );
4828
        }
4829
4830 6
        return $str;
4831
    }
4832
4833
    /**
4834
     * Returns a new string with the suffix $substring removed, if present.
4835
     *
4836
     * @param string $str
4837
     * @param string $substring <p>The suffix to remove.</p>
4838
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4839
     *
4840
     * @return string string having a $str without the suffix $substring
4841
     */
4842 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4843
    {
4844 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4845 6
            if ($encoding === 'UTF-8') {
4846 4
                return (string) \mb_substr(
4847 4
                    $str,
4848 4
                    0,
4849 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4850
                );
4851
            }
4852
4853 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4854
4855 2
            return (string) self::substr(
4856 2
                $str,
4857 2
                0,
4858 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4859 2
                $encoding
4860
            );
4861
        }
4862
4863 6
        return $str;
4864
    }
4865
4866
    /**
4867
     * Replaces all occurrences of $search in $str by $replacement.
4868
     *
4869
     * @param string $str           <p>The input string.</p>
4870
     * @param string $search        <p>The needle to search for.</p>
4871
     * @param string $replacement   <p>The string to replace with.</p>
4872
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4873
     *
4874
     * @return string string after the replacements
4875
     */
4876 29
    public static function replace(
4877
        string $str,
4878
        string $search,
4879
        string $replacement,
4880
        bool $caseSensitive = true
4881
    ): string {
4882 29
        if ($caseSensitive) {
4883 22
            return \str_replace($search, $replacement, $str);
4884
        }
4885
4886 7
        return self::str_ireplace($search, $replacement, $str);
4887
    }
4888
4889
    /**
4890
     * Replaces all occurrences of $search in $str by $replacement.
4891
     *
4892
     * @param string       $str           <p>The input string.</p>
4893
     * @param array        $search        <p>The elements to search for.</p>
4894
     * @param array|string $replacement   <p>The string to replace with.</p>
4895
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4896
     *
4897
     * @return string string after the replacements
4898
     */
4899 30
    public static function replace_all(
4900
        string $str,
4901
        array $search,
4902
        $replacement,
4903
        bool $caseSensitive = true
4904
    ): string {
4905 30
        if ($caseSensitive) {
4906 23
            return \str_replace($search, $replacement, $str);
4907
        }
4908
4909 7
        return self::str_ireplace($search, $replacement, $str);
4910
    }
4911
4912
    /**
4913
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4914
     *
4915
     * @param string $str                <p>The input string</p>
4916
     * @param string $replacementChar    <p>The replacement character.</p>
4917
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4918
     *
4919
     * @return string
4920
     */
4921 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4922
    {
4923 62
        if ($str === '') {
4924 9
            return '';
4925
        }
4926
4927 62
        if ($processInvalidUtf8 === true) {
4928 62
            $replacementCharHelper = $replacementChar;
4929 62
            if ($replacementChar === '') {
4930 62
                $replacementCharHelper = 'none';
4931
            }
4932
4933 62
            if (self::$SUPPORT['mbstring'] === false) {
4934
                // if there is no native support for "mbstring",
4935
                // then we need to clean the string before ...
4936
                $str = self::clean($str);
4937
            }
4938
4939 62
            $save = \mb_substitute_character();
4940 62
            \mb_substitute_character($replacementCharHelper);
4941
            // the polyfill maybe return false, so cast to string
4942 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4943 62
            \mb_substitute_character($save);
4944
        }
4945
4946 62
        return \str_replace(
4947
            [
4948 62
                "\xEF\xBF\xBD",
4949
                '�',
4950
            ],
4951
            [
4952 62
                $replacementChar,
4953 62
                $replacementChar,
4954
            ],
4955 62
            $str
4956
        );
4957
    }
4958
4959
    /**
4960
     * Strip whitespace or other characters from end of a UTF-8 string.
4961
     *
4962
     * @param string      $str   <p>The string to be trimmed.</p>
4963
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4964
     *
4965
     * @return string the string with unwanted characters stripped from the right
4966
     */
4967 20
    public static function rtrim(string $str = '', string $chars = null): string
4968
    {
4969 20
        if ($str === '') {
4970 3
            return '';
4971
        }
4972
4973 19
        if ($chars) {
4974 8
            $chars = \preg_quote($chars, '/');
4975 8
            $pattern = "[${chars}]+\$";
4976
        } else {
4977 14
            $pattern = "[\s]+\$";
4978
        }
4979
4980 19
        if (self::$SUPPORT['mbstring'] === true) {
4981
            /** @noinspection PhpComposerExtensionStubsInspection */
4982 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4983
        }
4984
4985
        return self::regex_replace($str, $pattern, '', '', '/');
4986
    }
4987
4988
    /**
4989
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4990
     *
4991
     * @psalm-suppress MissingReturnType
4992
     */
4993 2
    public static function showSupport()
4994
    {
4995 2
        echo '<pre>';
4996 2
        foreach (self::$SUPPORT as $key => &$value) {
4997 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4998
        }
4999 2
        unset($value);
5000 2
        echo '</pre>';
5001 2
    }
5002
5003
    /**
5004
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5005
     *
5006
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5007
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5008
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5009
     *
5010
     * @return string the HTML numbered entity
5011
     */
5012 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5013
    {
5014 2
        if ($char === '') {
5015 2
            return '';
5016
        }
5017
5018
        if (
5019 2
            $keepAsciiChars === true
5020
            &&
5021 2
            self::is_ascii($char) === true
5022
        ) {
5023 2
            return $char;
5024
        }
5025
5026 2
        return '&#' . self::ord($char, $encoding) . ';';
5027
    }
5028
5029
    /**
5030
     * @param string $str
5031
     * @param int    $tabLength
5032
     *
5033
     * @return string
5034
     */
5035 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5036
    {
5037 5
        if ($tabLength === 4) {
5038 3
            $tab = '    ';
5039 2
        } elseif ($tabLength === 2) {
5040 1
            $tab = '  ';
5041
        } else {
5042 1
            $tab = \str_repeat(' ', $tabLength);
5043
        }
5044
5045 5
        return \str_replace($tab, "\t", $str);
5046
    }
5047
5048
    /**
5049
     * alias for "UTF8::str_split()"
5050
     *
5051
     * @param string|string[] $str
5052
     * @param int             $length
5053
     * @param bool            $cleanUtf8
5054
     *
5055
     * @return string[]
5056
     *
5057
     * @see UTF8::str_split()
5058
     */
5059 9
    public static function split(
5060
        $str,
5061
        int $length = 1,
5062
        bool $cleanUtf8 = false
5063
    ): array {
5064 9
        return self::str_split($str, $length, $cleanUtf8);
5065
    }
5066
5067
    /**
5068
     * alias for "UTF8::str_starts_with()"
5069
     *
5070
     * @param string $haystack
5071
     * @param string $needle
5072
     *
5073
     * @return bool
5074
     *
5075
     * @see UTF8::str_starts_with()
5076
     */
5077
    public static function str_begins(string $haystack, string $needle): bool
5078
    {
5079
        return self::str_starts_with($haystack, $needle);
5080
    }
5081
5082
    /**
5083
     * Returns a camelCase version of the string. Trims surrounding spaces,
5084
     * capitalizes letters following digits, spaces, dashes and underscores,
5085
     * and removes spaces, dashes, as well as underscores.
5086
     *
5087
     * @param string      $str                   <p>The input string.</p>
5088
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5089
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5090
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5091
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5092
     *
5093
     * @return string
5094
     */
5095 32
    public static function str_camelize(
5096
        string $str,
5097
        string $encoding = 'UTF-8',
5098
        bool $cleanUtf8 = false,
5099
        string $lang = null,
5100
        bool $tryToKeepStringLength = false
5101
    ): string {
5102 32
        if ($cleanUtf8 === true) {
5103
            $str = self::clean($str);
5104
        }
5105
5106 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5107 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5108
        }
5109
5110 32
        $str = self::lcfirst(
5111 32
            \trim($str),
5112 32
            $encoding,
5113 32
            false,
5114 32
            $lang,
5115 32
            $tryToKeepStringLength
5116
        );
5117 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5118
5119 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5120
5121 32
        $str = (string) \preg_replace_callback(
5122 32
            '/[-_\s]+(.)?/u',
5123
            /**
5124
             * @param array $match
5125
             *
5126
             * @return string
5127
             */
5128
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5129 27
                if (isset($match[1])) {
5130 27
                    if ($useMbFunction === true) {
5131 27
                        if ($encoding === 'UTF-8') {
5132 27
                            return \mb_strtoupper($match[1]);
5133
                        }
5134
5135
                        return \mb_strtoupper($match[1], $encoding);
5136
                    }
5137
5138
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5139
                }
5140
5141 1
                return '';
5142 32
            },
5143 32
            $str
5144
        );
5145
5146 32
        return (string) \preg_replace_callback(
5147 32
            '/[\d]+(.)?/u',
5148
            /**
5149
             * @param array $match
5150
             *
5151
             * @return string
5152
             */
5153
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5154 6
                if ($useMbFunction === true) {
5155 6
                    if ($encoding === 'UTF-8') {
5156 6
                        return \mb_strtoupper($match[0]);
5157
                    }
5158
5159
                    return \mb_strtoupper($match[0], $encoding);
5160
                }
5161
5162
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5163 32
            },
5164 32
            $str
5165
        );
5166
    }
5167
5168
    /**
5169
     * Returns the string with the first letter of each word capitalized,
5170
     * except for when the word is a name which shouldn't be capitalized.
5171
     *
5172
     * @param string $str
5173
     *
5174
     * @return string string with $str capitalized
5175
     */
5176 1
    public static function str_capitalize_name(string $str): string
5177
    {
5178 1
        return self::str_capitalize_name_helper(
5179 1
            self::str_capitalize_name_helper(
5180 1
                self::collapse_whitespace($str),
5181 1
                ' '
5182
            ),
5183 1
            '-'
5184
        );
5185
    }
5186
5187
    /**
5188
     * Returns true if the string contains $needle, false otherwise. By default
5189
     * the comparison is case-sensitive, but can be made insensitive by setting
5190
     * $caseSensitive to false.
5191
     *
5192
     * @param string $haystack      <p>The input string.</p>
5193
     * @param string $needle        <p>Substring to look for.</p>
5194
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5195
     *
5196
     * @return bool whether or not $haystack contains $needle
5197
     */
5198 21
    public static function str_contains(
5199
        string $haystack,
5200
        string $needle,
5201
        bool $caseSensitive = true
5202
    ): bool {
5203 21
        if ($caseSensitive) {
5204 11
            return \strpos($haystack, $needle) !== false;
5205
        }
5206
5207 10
        return \mb_stripos($haystack, $needle) !== false;
5208
    }
5209
5210
    /**
5211
     * Returns true if the string contains all $needles, false otherwise. By
5212
     * default the comparison is case-sensitive, but can be made insensitive by
5213
     * setting $caseSensitive to false.
5214
     *
5215
     * @param string $haystack      <p>The input string.</p>
5216
     * @param array  $needles       <p>SubStrings to look for.</p>
5217
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5218
     *
5219
     * @return bool whether or not $haystack contains $needle
5220
     */
5221 44
    public static function str_contains_all(
5222
        string $haystack,
5223
        array $needles,
5224
        bool $caseSensitive = true
5225
    ): bool {
5226 44
        if ($haystack === '' || $needles === []) {
5227 1
            return false;
5228
        }
5229
5230
        /** @noinspection LoopWhichDoesNotLoopInspection */
5231 43
        foreach ($needles as &$needle) {
5232 43
            if (!$needle) {
5233 1
                return false;
5234
            }
5235
5236 42
            if ($caseSensitive) {
5237 22
                return \strpos($haystack, $needle) !== false;
5238
            }
5239
5240 20
            return \mb_stripos($haystack, $needle) !== false;
5241
        }
5242
5243
        return true;
5244
    }
5245
5246
    /**
5247
     * Returns true if the string contains any $needles, false otherwise. By
5248
     * default the comparison is case-sensitive, but can be made insensitive by
5249
     * setting $caseSensitive to false.
5250
     *
5251
     * @param string $haystack      <p>The input string.</p>
5252
     * @param array  $needles       <p>SubStrings to look for.</p>
5253
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5254
     *
5255
     * @return bool
5256
     *              Whether or not $str contains $needle
5257
     */
5258 46
    public static function str_contains_any(
5259
        string $haystack,
5260
        array $needles,
5261
        bool $caseSensitive = true
5262
    ): bool {
5263 46
        if ($haystack === '' || $needles === []) {
5264 1
            return false;
5265
        }
5266
5267
        /** @noinspection LoopWhichDoesNotLoopInspection */
5268 45
        foreach ($needles as &$needle) {
5269 45
            if (!$needle) {
5270
                continue;
5271
            }
5272
5273 45
            if ($caseSensitive) {
5274 25
                if (\strpos($haystack, $needle) !== false) {
5275 14
                    return true;
5276
                }
5277
5278 13
                continue;
5279
            }
5280
5281 20
            if (\mb_stripos($haystack, $needle) !== false) {
5282 20
                return true;
5283
            }
5284
        }
5285
5286 19
        return false;
5287
    }
5288
5289
    /**
5290
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5291
     * inserted before uppercase characters (with the exception of the first
5292
     * character of the string), and in place of spaces as well as underscores.
5293
     *
5294
     * @param string $str      <p>The input string.</p>
5295
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5296
     *
5297
     * @return string
5298
     */
5299 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5300
    {
5301 19
        return self::str_delimit($str, '-', $encoding);
5302
    }
5303
5304
    /**
5305
     * Returns a lowercase and trimmed string separated by the given delimiter.
5306
     * Delimiters are inserted before uppercase characters (with the exception
5307
     * of the first character of the string), and in place of spaces, dashes,
5308
     * and underscores. Alpha delimiters are not converted to lowercase.
5309
     *
5310
     * @param string      $str                   <p>The input string.</p>
5311
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5312
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5313
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5314
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5315
     *                                           tr</p>
5316
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5317
     *                                           ß</p>
5318
     *
5319
     * @return string
5320
     */
5321 49
    public static function str_delimit(
5322
        string $str,
5323
        string $delimiter,
5324
        string $encoding = 'UTF-8',
5325
        bool $cleanUtf8 = false,
5326
        string $lang = null,
5327
        bool $tryToKeepStringLength = false
5328
    ): string {
5329 49
        if (self::$SUPPORT['mbstring'] === true) {
5330
            /** @noinspection PhpComposerExtensionStubsInspection */
5331 49
            $str = (string) \mb_ereg_replace('\B(\p{Lu})', '-\1', \trim($str));
5332
5333 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5334 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5335 22
                $str = \mb_strtolower($str);
5336
            } else {
5337 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5338
            }
5339
5340
            /** @noinspection PhpComposerExtensionStubsInspection */
5341 49
            return (string) \mb_ereg_replace('[\-_\s]+', $delimiter, $str);
5342
        }
5343
5344
        $str = (string) \preg_replace('/\B(\p{Lu})/u', '-\1', \trim($str));
5345
5346
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5347
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5348
            $str = \mb_strtolower($str);
5349
        } else {
5350
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5351
        }
5352
5353
        return (string) \preg_replace('/[\-_\s]+/u', $delimiter, $str);
5354
    }
5355
5356
    /**
5357
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5358
     *
5359
     * @param string $str <p>The input string.</p>
5360
     *
5361
     * @return false|string
5362
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5363
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5364
     */
5365 30
    public static function str_detect_encoding($str)
5366
    {
5367
        // init
5368 30
        $str = (string) $str;
5369
5370
        //
5371
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5372
        //
5373
5374 30
        if (self::is_binary($str, true) === true) {
5375 11
            $isUtf16 = self::is_utf16($str, false);
5376 11
            if ($isUtf16 === 1) {
5377 2
                return 'UTF-16LE';
5378
            }
5379 11
            if ($isUtf16 === 2) {
5380 2
                return 'UTF-16BE';
5381
            }
5382
5383 9
            $isUtf32 = self::is_utf32($str, false);
5384 9
            if ($isUtf32 === 1) {
5385
                return 'UTF-32LE';
5386
            }
5387 9
            if ($isUtf32 === 2) {
5388
                return 'UTF-32BE';
5389
            }
5390
5391
            // is binary but not "UTF-16" or "UTF-32"
5392 9
            return false;
5393
        }
5394
5395
        //
5396
        // 2.) simple check for ASCII chars
5397
        //
5398
5399 26
        if (self::is_ascii($str) === true) {
5400 10
            return 'ASCII';
5401
        }
5402
5403
        //
5404
        // 3.) simple check for UTF-8 chars
5405
        //
5406
5407 26
        if (self::is_utf8($str) === true) {
5408 19
            return 'UTF-8';
5409
        }
5410
5411
        //
5412
        // 4.) check via "mb_detect_encoding()"
5413
        //
5414
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5415
5416
        $detectOrder = [
5417 15
            'ISO-8859-1',
5418
            'ISO-8859-2',
5419
            'ISO-8859-3',
5420
            'ISO-8859-4',
5421
            'ISO-8859-5',
5422
            'ISO-8859-6',
5423
            'ISO-8859-7',
5424
            'ISO-8859-8',
5425
            'ISO-8859-9',
5426
            'ISO-8859-10',
5427
            'ISO-8859-13',
5428
            'ISO-8859-14',
5429
            'ISO-8859-15',
5430
            'ISO-8859-16',
5431
            'WINDOWS-1251',
5432
            'WINDOWS-1252',
5433
            'WINDOWS-1254',
5434
            'CP932',
5435
            'CP936',
5436
            'CP950',
5437
            'CP866',
5438
            'CP850',
5439
            'CP51932',
5440
            'CP50220',
5441
            'CP50221',
5442
            'CP50222',
5443
            'ISO-2022-JP',
5444
            'ISO-2022-KR',
5445
            'JIS',
5446
            'JIS-ms',
5447
            'EUC-CN',
5448
            'EUC-JP',
5449
        ];
5450
5451 15
        if (self::$SUPPORT['mbstring'] === true) {
5452
            // info: do not use the symfony polyfill here
5453 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5454 15
            if ($encoding) {
5455 15
                return $encoding;
5456
            }
5457
        }
5458
5459
        //
5460
        // 5.) check via "iconv()"
5461
        //
5462
5463
        if (self::$ENCODINGS === null) {
5464
            self::$ENCODINGS = self::getData('encodings');
5465
        }
5466
5467
        foreach (self::$ENCODINGS as $encodingTmp) {
5468
            // INFO: //IGNORE but still throw notice
5469
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5470
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5471
                return $encodingTmp;
5472
            }
5473
        }
5474
5475
        return false;
5476
    }
5477
5478
    /**
5479
     * alias for "UTF8::str_ends_with()"
5480
     *
5481
     * @param string $haystack
5482
     * @param string $needle
5483
     *
5484
     * @return bool
5485
     *
5486
     * @see UTF8::str_ends_with()
5487
     */
5488
    public static function str_ends(string $haystack, string $needle): bool
5489
    {
5490
        return self::str_ends_with($haystack, $needle);
5491
    }
5492
5493
    /**
5494
     * Check if the string ends with the given substring.
5495
     *
5496
     * @param string $haystack <p>The string to search in.</p>
5497
     * @param string $needle   <p>The substring to search for.</p>
5498
     *
5499
     * @return bool
5500
     */
5501 9
    public static function str_ends_with(string $haystack, string $needle): bool
5502
    {
5503 9
        if ($needle === '') {
5504 2
            return true;
5505
        }
5506
5507 9
        if ($haystack === '') {
5508
            return false;
5509
        }
5510
5511 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5512
    }
5513
5514
    /**
5515
     * Returns true if the string ends with any of $substrings, false otherwise.
5516
     *
5517
     * - case-sensitive
5518
     *
5519
     * @param string   $str        <p>The input string.</p>
5520
     * @param string[] $substrings <p>Substrings to look for.</p>
5521
     *
5522
     * @return bool whether or not $str ends with $substring
5523
     */
5524 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5525
    {
5526 7
        if ($substrings === []) {
5527
            return false;
5528
        }
5529
5530 7
        foreach ($substrings as &$substring) {
5531 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5532 7
                return true;
5533
            }
5534
        }
5535
5536 6
        return false;
5537
    }
5538
5539
    /**
5540
     * Ensures that the string begins with $substring. If it doesn't, it's
5541
     * prepended.
5542
     *
5543
     * @param string $str       <p>The input string.</p>
5544
     * @param string $substring <p>The substring to add if not present.</p>
5545
     *
5546
     * @return string
5547
     */
5548 10
    public static function str_ensure_left(string $str, string $substring): string
5549
    {
5550
        if (
5551 10
            $substring !== ''
5552
            &&
5553 10
            \strpos($str, $substring) === 0
5554
        ) {
5555 6
            return $str;
5556
        }
5557
5558 4
        return $substring . $str;
5559
    }
5560
5561
    /**
5562
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5563
     *
5564
     * @param string $str       <p>The input string.</p>
5565
     * @param string $substring <p>The substring to add if not present.</p>
5566
     *
5567
     * @return string
5568
     */
5569 10
    public static function str_ensure_right(string $str, string $substring): string
5570
    {
5571
        if (
5572 10
            $str === ''
5573
            ||
5574 10
            $substring === ''
5575
            ||
5576 10
            \substr($str, -\strlen($substring)) !== $substring
5577
        ) {
5578 4
            $str .= $substring;
5579
        }
5580
5581 10
        return $str;
5582
    }
5583
5584
    /**
5585
     * Capitalizes the first word of the string, replaces underscores with
5586
     * spaces, and strips '_id'.
5587
     *
5588
     * @param string $str
5589
     *
5590
     * @return string
5591
     */
5592 3
    public static function str_humanize($str): string
5593
    {
5594 3
        $str = \str_replace(
5595
            [
5596 3
                '_id',
5597
                '_',
5598
            ],
5599
            [
5600 3
                '',
5601
                ' ',
5602
            ],
5603 3
            $str
5604
        );
5605
5606 3
        return self::ucfirst(\trim($str));
5607
    }
5608
5609
    /**
5610
     * alias for "UTF8::str_istarts_with()"
5611
     *
5612
     * @param string $haystack
5613
     * @param string $needle
5614
     *
5615
     * @return bool
5616
     *
5617
     * @see UTF8::str_istarts_with()
5618
     */
5619
    public static function str_ibegins(string $haystack, string $needle): bool
5620
    {
5621
        return self::str_istarts_with($haystack, $needle);
5622
    }
5623
5624
    /**
5625
     * alias for "UTF8::str_iends_with()"
5626
     *
5627
     * @param string $haystack
5628
     * @param string $needle
5629
     *
5630
     * @return bool
5631
     *
5632
     * @see UTF8::str_iends_with()
5633
     */
5634
    public static function str_iends(string $haystack, string $needle): bool
5635
    {
5636
        return self::str_iends_with($haystack, $needle);
5637
    }
5638
5639
    /**
5640
     * Check if the string ends with the given substring, case insensitive.
5641
     *
5642
     * @param string $haystack <p>The string to search in.</p>
5643
     * @param string $needle   <p>The substring to search for.</p>
5644
     *
5645
     * @return bool
5646
     */
5647 12
    public static function str_iends_with(string $haystack, string $needle): bool
5648
    {
5649 12
        if ($needle === '') {
5650 2
            return true;
5651
        }
5652
5653 12
        if ($haystack === '') {
5654
            return false;
5655
        }
5656
5657 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5658
    }
5659
5660
    /**
5661
     * Returns true if the string ends with any of $substrings, false otherwise.
5662
     *
5663
     * - case-insensitive
5664
     *
5665
     * @param string   $str        <p>The input string.</p>
5666
     * @param string[] $substrings <p>Substrings to look for.</p>
5667
     *
5668
     * @return bool whether or not $str ends with $substring
5669
     */
5670 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5671
    {
5672 4
        if ($substrings === []) {
5673
            return false;
5674
        }
5675
5676 4
        foreach ($substrings as &$substring) {
5677 4
            if (self::str_iends_with($str, $substring)) {
5678 4
                return true;
5679
            }
5680
        }
5681
5682
        return false;
5683
    }
5684
5685
    /**
5686
     * Returns the index of the first occurrence of $needle in the string,
5687
     * and false if not found. Accepts an optional offset from which to begin
5688
     * the search.
5689
     *
5690
     * @param string $str      <p>The input string.</p>
5691
     * @param string $needle   <p>Substring to look for.</p>
5692
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5693
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5694
     *
5695
     * @return false|int
5696
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5697
     */
5698 2
    public static function str_iindex_first(
5699
        string $str,
5700
        string $needle,
5701
        int $offset = 0,
5702
        string $encoding = 'UTF-8'
5703
    ) {
5704 2
        return self::stripos(
5705 2
            $str,
5706 2
            $needle,
5707 2
            $offset,
5708 2
            $encoding
5709
        );
5710
    }
5711
5712
    /**
5713
     * Returns the index of the last occurrence of $needle in the string,
5714
     * and false if not found. Accepts an optional offset from which to begin
5715
     * the search. Offsets may be negative to count from the last character
5716
     * in the string.
5717
     *
5718
     * @param string $str      <p>The input string.</p>
5719
     * @param string $needle   <p>Substring to look for.</p>
5720
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5721
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5722
     *
5723
     * @return false|int
5724
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5725
     */
5726
    public static function str_iindex_last(
5727
        string $str,
5728
        string $needle,
5729
        int $offset = 0,
5730
        string $encoding = 'UTF-8'
5731
    ) {
5732
        return self::strripos(
5733
            $str,
5734
            $needle,
5735
            $offset,
5736
            $encoding
5737
        );
5738
    }
5739
5740
    /**
5741
     * Returns the index of the first occurrence of $needle in the string,
5742
     * and false if not found. Accepts an optional offset from which to begin
5743
     * the search.
5744
     *
5745
     * @param string $str      <p>The input string.</p>
5746
     * @param string $needle   <p>Substring to look for.</p>
5747
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5748
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5749
     *
5750
     * @return false|int
5751
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5752
     */
5753 10
    public static function str_index_first(
5754
        string $str,
5755
        string $needle,
5756
        int $offset = 0,
5757
        string $encoding = 'UTF-8'
5758
    ) {
5759 10
        return self::strpos(
5760 10
            $str,
5761 10
            $needle,
5762 10
            $offset,
5763 10
            $encoding
5764
        );
5765
    }
5766
5767
    /**
5768
     * Returns the index of the last occurrence of $needle in the string,
5769
     * and false if not found. Accepts an optional offset from which to begin
5770
     * the search. Offsets may be negative to count from the last character
5771
     * in the string.
5772
     *
5773
     * @param string $str      <p>The input string.</p>
5774
     * @param string $needle   <p>Substring to look for.</p>
5775
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5776
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5777
     *
5778
     * @return false|int
5779
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5780
     */
5781 10
    public static function str_index_last(
5782
        string $str,
5783
        string $needle,
5784
        int $offset = 0,
5785
        string $encoding = 'UTF-8'
5786
    ) {
5787 10
        return self::strrpos(
5788 10
            $str,
5789 10
            $needle,
5790 10
            $offset,
5791 10
            $encoding
5792
        );
5793
    }
5794
5795
    /**
5796
     * Inserts $substring into the string at the $index provided.
5797
     *
5798
     * @param string $str       <p>The input string.</p>
5799
     * @param string $substring <p>String to be inserted.</p>
5800
     * @param int    $index     <p>The index at which to insert the substring.</p>
5801
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5802
     *
5803
     * @return string
5804
     */
5805 8
    public static function str_insert(
5806
        string $str,
5807
        string $substring,
5808
        int $index,
5809
        string $encoding = 'UTF-8'
5810
    ): string {
5811 8
        if ($encoding === 'UTF-8') {
5812 4
            $len = (int) \mb_strlen($str);
5813 4
            if ($index > $len) {
5814
                return $str;
5815
            }
5816
5817
            /** @noinspection UnnecessaryCastingInspection */
5818 4
            return (string) \mb_substr($str, 0, $index) .
5819 4
                   $substring .
5820 4
                   (string) \mb_substr($str, $index, $len);
5821
        }
5822
5823 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5824
5825 4
        $len = (int) self::strlen($str, $encoding);
5826 4
        if ($index > $len) {
5827 1
            return $str;
5828
        }
5829
5830 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5831 3
               $substring .
5832 3
               ((string) self::substr($str, $index, $len, $encoding));
5833
    }
5834
5835
    /**
5836
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5837
     *
5838
     * @see http://php.net/manual/en/function.str-ireplace.php
5839
     *
5840
     * @param mixed $search  <p>
5841
     *                       Every replacement with search array is
5842
     *                       performed on the result of previous replacement.
5843
     *                       </p>
5844
     * @param mixed $replace <p>
5845
     *                       </p>
5846
     * @param mixed $subject <p>
5847
     *                       If subject is an array, then the search and
5848
     *                       replace is performed with every entry of
5849
     *                       subject, and the return value is an array as
5850
     *                       well.
5851
     *                       </p>
5852
     * @param int   $count   [optional] <p>
5853
     *                       The number of matched and replaced needles will
5854
     *                       be returned in count which is passed by
5855
     *                       reference.
5856
     *                       </p>
5857
     *
5858
     * @return mixed a string or an array of replacements
5859
     */
5860 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5861
    {
5862 29
        $search = (array) $search;
5863
5864
        /** @noinspection AlterInForeachInspection */
5865 29
        foreach ($search as &$s) {
5866 29
            $s = (string) $s;
5867 29
            if ($s === '') {
5868 6
                $s = '/^(?<=.)$/';
5869
            } else {
5870 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5871
            }
5872
        }
5873
5874 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5875 29
        $count = $replace; // used as reference parameter
5876
5877 29
        return $subject;
5878
    }
5879
5880
    /**
5881
     * Replaces $search from the beginning of string with $replacement.
5882
     *
5883
     * @param string $str         <p>The input string.</p>
5884
     * @param string $search      <p>The string to search for.</p>
5885
     * @param string $replacement <p>The replacement.</p>
5886
     *
5887
     * @return string string after the replacements
5888
     */
5889 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5890
    {
5891 17
        if ($str === '') {
5892 4
            if ($replacement === '') {
5893 2
                return '';
5894
            }
5895
5896 2
            if ($search === '') {
5897 2
                return $replacement;
5898
            }
5899
        }
5900
5901 13
        if ($search === '') {
5902 2
            return $str . $replacement;
5903
        }
5904
5905 11
        if (\stripos($str, $search) === 0) {
5906 10
            return $replacement . \substr($str, \strlen($search));
5907
        }
5908
5909 1
        return $str;
5910
    }
5911
5912
    /**
5913
     * Replaces $search from the ending of string with $replacement.
5914
     *
5915
     * @param string $str         <p>The input string.</p>
5916
     * @param string $search      <p>The string to search for.</p>
5917
     * @param string $replacement <p>The replacement.</p>
5918
     *
5919
     * @return string string after the replacements
5920
     */
5921 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5922
    {
5923 17
        if ($str === '') {
5924 4
            if ($replacement === '') {
5925 2
                return '';
5926
            }
5927
5928 2
            if ($search === '') {
5929 2
                return $replacement;
5930
            }
5931
        }
5932
5933 13
        if ($search === '') {
5934 2
            return $str . $replacement;
5935
        }
5936
5937 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5938 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5939
        }
5940
5941 11
        return $str;
5942
    }
5943
5944
    /**
5945
     * Check if the string starts with the given substring, case insensitive.
5946
     *
5947
     * @param string $haystack <p>The string to search in.</p>
5948
     * @param string $needle   <p>The substring to search for.</p>
5949
     *
5950
     * @return bool
5951
     */
5952 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5953
    {
5954 12
        if ($needle === '') {
5955 2
            return true;
5956
        }
5957
5958 12
        if ($haystack === '') {
5959
            return false;
5960
        }
5961
5962 12
        return self::stripos($haystack, $needle) === 0;
5963
    }
5964
5965
    /**
5966
     * Returns true if the string begins with any of $substrings, false otherwise.
5967
     *
5968
     * - case-insensitive
5969
     *
5970
     * @param string $str        <p>The input string.</p>
5971
     * @param array  $substrings <p>Substrings to look for.</p>
5972
     *
5973
     * @return bool whether or not $str starts with $substring
5974
     */
5975 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
5976
    {
5977 4
        if ($str === '') {
5978
            return false;
5979
        }
5980
5981 4
        if ($substrings === []) {
5982
            return false;
5983
        }
5984
5985 4
        foreach ($substrings as &$substring) {
5986 4
            if (self::str_istarts_with($str, $substring)) {
5987 4
                return true;
5988
            }
5989
        }
5990
5991
        return false;
5992
    }
5993
5994
    /**
5995
     * Gets the substring after the first occurrence of a separator.
5996
     *
5997
     * @param string $str       <p>The input string.</p>
5998
     * @param string $separator <p>The string separator.</p>
5999
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6000
     *
6001
     * @return string
6002
     */
6003 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6004
    {
6005 1
        if ($separator === '' || $str === '') {
6006 1
            return '';
6007
        }
6008
6009 1
        $offset = self::str_iindex_first($str, $separator);
6010 1
        if ($offset === false) {
6011 1
            return '';
6012
        }
6013
6014 1
        if ($encoding === 'UTF-8') {
6015 1
            return (string) \mb_substr(
6016 1
                $str,
6017 1
                $offset + (int) \mb_strlen($separator)
6018
            );
6019
        }
6020
6021
        return (string) self::substr(
6022
            $str,
6023
            $offset + (int) self::strlen($separator, $encoding),
6024
            null,
6025
            $encoding
6026
        );
6027
    }
6028
6029
    /**
6030
     * Gets the substring after the last occurrence of a separator.
6031
     *
6032
     * @param string $str       <p>The input string.</p>
6033
     * @param string $separator <p>The string separator.</p>
6034
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6035
     *
6036
     * @return string
6037
     */
6038 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6039
    {
6040 1
        if ($separator === '' || $str === '') {
6041 1
            return '';
6042
        }
6043
6044 1
        $offset = self::strripos($str, $separator);
6045 1
        if ($offset === false) {
6046 1
            return '';
6047
        }
6048
6049 1
        if ($encoding === 'UTF-8') {
6050 1
            return (string) \mb_substr(
6051 1
                $str,
6052 1
                $offset + (int) self::strlen($separator)
6053
            );
6054
        }
6055
6056
        return (string) self::substr(
6057
            $str,
6058
            $offset + (int) self::strlen($separator, $encoding),
6059
            null,
6060
            $encoding
6061
        );
6062
    }
6063
6064
    /**
6065
     * Gets the substring before the first occurrence of a separator.
6066
     *
6067
     * @param string $str       <p>The input string.</p>
6068
     * @param string $separator <p>The string separator.</p>
6069
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6070
     *
6071
     * @return string
6072
     */
6073 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6074
    {
6075 1
        if ($separator === '' || $str === '') {
6076 1
            return '';
6077
        }
6078
6079 1
        $offset = self::str_iindex_first($str, $separator);
6080 1
        if ($offset === false) {
6081 1
            return '';
6082
        }
6083
6084 1
        if ($encoding === 'UTF-8') {
6085 1
            return (string) \mb_substr($str, 0, $offset);
6086
        }
6087
6088
        return (string) self::substr($str, 0, $offset, $encoding);
6089
    }
6090
6091
    /**
6092
     * Gets the substring before the last occurrence of a separator.
6093
     *
6094
     * @param string $str       <p>The input string.</p>
6095
     * @param string $separator <p>The string separator.</p>
6096
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6097
     *
6098
     * @return string
6099
     */
6100 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6101
    {
6102 1
        if ($separator === '' || $str === '') {
6103 1
            return '';
6104
        }
6105
6106 1
        if ($encoding === 'UTF-8') {
6107 1
            $offset = \mb_strripos($str, $separator);
6108 1
            if ($offset === false) {
6109 1
                return '';
6110
            }
6111
6112 1
            return (string) \mb_substr($str, 0, $offset);
6113
        }
6114
6115
        $offset = self::strripos($str, $separator, 0, $encoding);
6116
        if ($offset === false) {
6117
            return '';
6118
        }
6119
6120
        return (string) self::substr($str, 0, $offset, $encoding);
6121
    }
6122
6123
    /**
6124
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6125
     *
6126
     * @param string $str          <p>The input string.</p>
6127
     * @param string $needle       <p>The string to look for.</p>
6128
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6129
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6130
     *
6131
     * @return string
6132
     */
6133 2
    public static function str_isubstr_first(
6134
        string $str,
6135
        string $needle,
6136
        bool $beforeNeedle = false,
6137
        string $encoding = 'UTF-8'
6138
    ): string {
6139
        if (
6140 2
            $needle === ''
6141
            ||
6142 2
            $str === ''
6143
        ) {
6144 2
            return '';
6145
        }
6146
6147 2
        $part = self::stristr(
6148 2
            $str,
6149 2
            $needle,
6150 2
            $beforeNeedle,
6151 2
            $encoding
6152
        );
6153 2
        if ($part === false) {
6154 2
            return '';
6155
        }
6156
6157 2
        return $part;
6158
    }
6159
6160
    /**
6161
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6162
     *
6163
     * @param string $str          <p>The input string.</p>
6164
     * @param string $needle       <p>The string to look for.</p>
6165
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6166
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6167
     *
6168
     * @return string
6169
     */
6170 1
    public static function str_isubstr_last(
6171
        string $str,
6172
        string $needle,
6173
        bool $beforeNeedle = false,
6174
        string $encoding = 'UTF-8'
6175
    ): string {
6176
        if (
6177 1
            $needle === ''
6178
            ||
6179 1
            $str === ''
6180
        ) {
6181 1
            return '';
6182
        }
6183
6184 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6185 1
        if ($part === false) {
6186 1
            return '';
6187
        }
6188
6189 1
        return $part;
6190
    }
6191
6192
    /**
6193
     * Returns the last $n characters of the string.
6194
     *
6195
     * @param string $str      <p>The input string.</p>
6196
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6197
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6198
     *
6199
     * @return string
6200
     */
6201 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6202
    {
6203 12
        if ($str === '' || $n <= 0) {
6204 4
            return '';
6205
        }
6206
6207 8
        if ($encoding === 'UTF-8') {
6208 4
            return (string) \mb_substr($str, -$n);
6209
        }
6210
6211 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6212
6213 4
        return (string) self::substr($str, -$n, null, $encoding);
6214
    }
6215
6216
    /**
6217
     * Limit the number of characters in a string.
6218
     *
6219
     * @param string $str      <p>The input string.</p>
6220
     * @param int    $length   [optional] <p>Default: 100</p>
6221
     * @param string $strAddOn [optional] <p>Default: …</p>
6222
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6223
     *
6224
     * @return string
6225
     */
6226 2
    public static function str_limit(
6227
        string $str,
6228
        int $length = 100,
6229
        string $strAddOn = '…',
6230
        string $encoding = 'UTF-8'
6231
    ): string {
6232 2
        if ($str === '' || $length <= 0) {
6233 2
            return '';
6234
        }
6235
6236 2
        if ($encoding === 'UTF-8') {
6237 2
            if ((int) \mb_strlen($str) <= $length) {
6238 2
                return $str;
6239
            }
6240
6241
            /** @noinspection UnnecessaryCastingInspection */
6242 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6243
        }
6244
6245
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6246
6247
        if ((int) self::strlen($str, $encoding) <= $length) {
6248
            return $str;
6249
        }
6250
6251
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6252
    }
6253
6254
    /**
6255
     * Limit the number of characters in a string, but also after the next word.
6256
     *
6257
     * @param string $str      <p>The input string.</p>
6258
     * @param int    $length   [optional] <p>Default: 100</p>
6259
     * @param string $strAddOn [optional] <p>Default: …</p>
6260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6261
     *
6262
     * @return string
6263
     */
6264 6
    public static function str_limit_after_word(
6265
        string $str,
6266
        int $length = 100,
6267
        string $strAddOn = '…',
6268
        string $encoding = 'UTF-8'
6269
    ): string {
6270 6
        if ($str === '' || $length <= 0) {
6271 2
            return '';
6272
        }
6273
6274 6
        if ($encoding === 'UTF-8') {
6275
            /** @noinspection UnnecessaryCastingInspection */
6276 2
            if ((int) \mb_strlen($str) <= $length) {
6277 2
                return $str;
6278
            }
6279
6280 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6281 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6282
            }
6283
6284 2
            $str = \mb_substr($str, 0, $length);
6285
6286 2
            $array = \explode(' ', $str);
6287 2
            \array_pop($array);
6288 2
            $new_str = \implode(' ', $array);
6289
6290 2
            if ($new_str === '') {
6291 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6292
            }
6293
        } else {
6294 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6295
                return $str;
6296
            }
6297
6298 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6299 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6300
            }
6301
6302 1
            $str = self::substr($str, 0, $length, $encoding);
6303 1
            if ($str === false) {
6304
                return '' . $strAddOn;
6305
            }
6306
6307 1
            $array = \explode(' ', $str);
6308 1
            \array_pop($array);
6309 1
            $new_str = \implode(' ', $array);
6310
6311 1
            if ($new_str === '') {
6312
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6313
            }
6314
        }
6315
6316 3
        return $new_str . $strAddOn;
6317
    }
6318
6319
    /**
6320
     * Returns the longest common prefix between the string and $otherStr.
6321
     *
6322
     * @param string $str      <p>The input sting.</p>
6323
     * @param string $otherStr <p>Second string for comparison.</p>
6324
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6325
     *
6326
     * @return string
6327
     */
6328 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6329
    {
6330
        // init
6331 10
        $longestCommonPrefix = '';
6332
6333 10
        if ($encoding === 'UTF-8') {
6334 5
            $maxLength = (int) \min(
6335 5
                \mb_strlen($str),
6336 5
                \mb_strlen($otherStr)
6337
            );
6338
6339 5
            for ($i = 0; $i < $maxLength; ++$i) {
6340 4
                $char = \mb_substr($str, $i, 1);
6341
6342
                if (
6343 4
                    $char !== false
6344
                    &&
6345 4
                    $char === \mb_substr($otherStr, $i, 1)
6346
                ) {
6347 3
                    $longestCommonPrefix .= $char;
6348
                } else {
6349 3
                    break;
6350
                }
6351
            }
6352
        } else {
6353 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6354
6355 5
            $maxLength = (int) \min(
6356 5
                self::strlen($str, $encoding),
6357 5
                self::strlen($otherStr, $encoding)
6358
            );
6359
6360 5
            for ($i = 0; $i < $maxLength; ++$i) {
6361 4
                $char = self::substr($str, $i, 1, $encoding);
6362
6363
                if (
6364 4
                    $char !== false
6365
                    &&
6366 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6367
                ) {
6368 3
                    $longestCommonPrefix .= $char;
6369
                } else {
6370 3
                    break;
6371
                }
6372
            }
6373
        }
6374
6375 10
        return $longestCommonPrefix;
6376
    }
6377
6378
    /**
6379
     * Returns the longest common substring between the string and $otherStr.
6380
     * In the case of ties, it returns that which occurs first.
6381
     *
6382
     * @param string $str
6383
     * @param string $otherStr <p>Second string for comparison.</p>
6384
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6385
     *
6386
     * @return string string with its $str being the longest common substring
6387
     */
6388 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6389
    {
6390 11
        if ($str === '' || $otherStr === '') {
6391 2
            return '';
6392
        }
6393
6394
        // Uses dynamic programming to solve
6395
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6396
6397 9
        if ($encoding === 'UTF-8') {
6398 4
            $strLength = (int) \mb_strlen($str);
6399 4
            $otherLength = (int) \mb_strlen($otherStr);
6400
        } else {
6401 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6402
6403 5
            $strLength = (int) self::strlen($str, $encoding);
6404 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6405
        }
6406
6407
        // Return if either string is empty
6408 9
        if ($strLength === 0 || $otherLength === 0) {
6409
            return '';
6410
        }
6411
6412 9
        $len = 0;
6413 9
        $end = 0;
6414 9
        $table = \array_fill(
6415 9
            0,
6416 9
            $strLength + 1,
6417 9
            \array_fill(0, $otherLength + 1, 0)
6418
        );
6419
6420 9
        if ($encoding === 'UTF-8') {
6421 9
            for ($i = 1; $i <= $strLength; ++$i) {
6422 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6423 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6424 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6425
6426 9
                    if ($strChar === $otherChar) {
6427 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6428 8
                        if ($table[$i][$j] > $len) {
6429 8
                            $len = $table[$i][$j];
6430 8
                            $end = $i;
6431
                        }
6432
                    } else {
6433 9
                        $table[$i][$j] = 0;
6434
                    }
6435
                }
6436
            }
6437
        } else {
6438
            for ($i = 1; $i <= $strLength; ++$i) {
6439
                for ($j = 1; $j <= $otherLength; ++$j) {
6440
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6441
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6442
6443
                    if ($strChar === $otherChar) {
6444
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6445
                        if ($table[$i][$j] > $len) {
6446
                            $len = $table[$i][$j];
6447
                            $end = $i;
6448
                        }
6449
                    } else {
6450
                        $table[$i][$j] = 0;
6451
                    }
6452
                }
6453
            }
6454
        }
6455
6456 9
        if ($encoding === 'UTF-8') {
6457 9
            return (string) \mb_substr($str, $end - $len, $len);
6458
        }
6459
6460
        return (string) self::substr($str, $end - $len, $len, $encoding);
6461
    }
6462
6463
    /**
6464
     * Returns the longest common suffix between the string and $otherStr.
6465
     *
6466
     * @param string $str
6467
     * @param string $otherStr <p>Second string for comparison.</p>
6468
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6469
     *
6470
     * @return string
6471
     */
6472 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6473
    {
6474 10
        if ($str === '' || $otherStr === '') {
6475 2
            return '';
6476
        }
6477
6478 8
        if ($encoding === 'UTF-8') {
6479 4
            $maxLength = (int) \min(
6480 4
                \mb_strlen($str, $encoding),
6481 4
                \mb_strlen($otherStr, $encoding)
6482
            );
6483
6484 4
            $longestCommonSuffix = '';
6485 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6486 4
                $char = \mb_substr($str, -$i, 1);
6487
6488
                if (
6489 4
                    $char !== false
6490
                    &&
6491 4
                    $char === \mb_substr($otherStr, -$i, 1)
6492
                ) {
6493 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6494
                } else {
6495 3
                    break;
6496
                }
6497
            }
6498
        } else {
6499 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6500
6501 4
            $maxLength = (int) \min(
6502 4
                self::strlen($str, $encoding),
6503 4
                self::strlen($otherStr, $encoding)
6504
            );
6505
6506 4
            $longestCommonSuffix = '';
6507 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6508 4
                $char = self::substr($str, -$i, 1, $encoding);
6509
6510
                if (
6511 4
                    $char !== false
6512
                    &&
6513 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6514
                ) {
6515 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6516
                } else {
6517 3
                    break;
6518
                }
6519
            }
6520
        }
6521
6522 8
        return $longestCommonSuffix;
6523
    }
6524
6525
    /**
6526
     * Returns true if $str matches the supplied pattern, false otherwise.
6527
     *
6528
     * @param string $str     <p>The input string.</p>
6529
     * @param string $pattern <p>Regex pattern to match against.</p>
6530
     *
6531
     * @return bool whether or not $str matches the pattern
6532
     */
6533
    public static function str_matches_pattern(string $str, string $pattern): bool
6534
    {
6535
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6536
    }
6537
6538
    /**
6539
     * Returns whether or not a character exists at an index. Offsets may be
6540
     * negative to count from the last character in the string. Implements
6541
     * part of the ArrayAccess interface.
6542
     *
6543
     * @param string $str      <p>The input string.</p>
6544
     * @param int    $offset   <p>The index to check.</p>
6545
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6546
     *
6547
     * @return bool whether or not the index exists
6548
     */
6549 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6550
    {
6551
        // init
6552 6
        $length = (int) self::strlen($str, $encoding);
6553
6554 6
        if ($offset >= 0) {
6555 3
            return $length > $offset;
6556
        }
6557
6558 3
        return $length >= \abs($offset);
6559
    }
6560
6561
    /**
6562
     * Returns the character at the given index. Offsets may be negative to
6563
     * count from the last character in the string. Implements part of the
6564
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6565
     * does not exist.
6566
     *
6567
     * @param string $str      <p>The input string.</p>
6568
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6569
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6570
     *
6571
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6572
     *
6573
     * @return string the character at the specified index
6574
     */
6575 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6576
    {
6577
        // init
6578 2
        $length = (int) self::strlen($str);
6579
6580
        if (
6581 2
            ($index >= 0 && $length <= $index)
6582
            ||
6583 2
            $length < \abs($index)
6584
        ) {
6585 1
            throw new \OutOfBoundsException('No character exists at the index');
6586
        }
6587
6588 1
        return self::char_at($str, $index, $encoding);
6589
    }
6590
6591
    /**
6592
     * Pad a UTF-8 string to given length with another string.
6593
     *
6594
     * @param string     $str        <p>The input string.</p>
6595
     * @param int        $pad_length <p>The length of return string.</p>
6596
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6597
     * @param int|string $pad_type   [optional] <p>
6598
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6599
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6600
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6601
     *                               </p>
6602
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6603
     *
6604
     * @return string returns the padded string
6605
     */
6606 41
    public static function str_pad(
6607
        string $str,
6608
        int $pad_length,
6609
        string $pad_string = ' ',
6610
        $pad_type = \STR_PAD_RIGHT,
6611
        string $encoding = 'UTF-8'
6612
    ): string {
6613 41
        if ($pad_length === 0 || $pad_string === '') {
6614 1
            return $str;
6615
        }
6616
6617 41
        if ($pad_type !== (int) $pad_type) {
6618 13
            if ($pad_type === 'left') {
6619 3
                $pad_type = \STR_PAD_LEFT;
6620 10
            } elseif ($pad_type === 'right') {
6621 6
                $pad_type = \STR_PAD_RIGHT;
6622 4
            } elseif ($pad_type === 'both') {
6623 3
                $pad_type = \STR_PAD_BOTH;
6624
            } else {
6625 1
                throw new \InvalidArgumentException(
6626 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6627
                );
6628
            }
6629
        }
6630
6631 40
        if ($encoding === 'UTF-8') {
6632 25
            $str_length = (int) \mb_strlen($str);
6633
6634 25
            if ($pad_length >= $str_length) {
6635
                switch ($pad_type) {
6636 25
                    case \STR_PAD_LEFT:
6637 8
                        $ps_length = (int) \mb_strlen($pad_string);
6638
6639 8
                        $diff = ($pad_length - $str_length);
6640
6641 8
                        $pre = (string) \mb_substr(
6642 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6643 8
                            0,
6644 8
                            $diff
6645
                        );
6646 8
                        $post = '';
6647
6648 8
                        break;
6649
6650 20
                    case \STR_PAD_BOTH:
6651 14
                        $diff = ($pad_length - $str_length);
6652
6653 14
                        $ps_length_left = (int) \floor($diff / 2);
6654
6655 14
                        $ps_length_right = (int) \ceil($diff / 2);
6656
6657 14
                        $pre = (string) \mb_substr(
6658 14
                            \str_repeat($pad_string, $ps_length_left),
6659 14
                            0,
6660 14
                            $ps_length_left
6661
                        );
6662 14
                        $post = (string) \mb_substr(
6663 14
                            \str_repeat($pad_string, $ps_length_right),
6664 14
                            0,
6665 14
                            $ps_length_right
6666
                        );
6667
6668 14
                        break;
6669
6670 9
                    case \STR_PAD_RIGHT:
6671
                    default:
6672 9
                        $ps_length = (int) \mb_strlen($pad_string);
6673
6674 9
                        $diff = ($pad_length - $str_length);
6675
6676 9
                        $post = (string) \mb_substr(
6677 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6678 9
                            0,
6679 9
                            $diff
6680
                        );
6681 9
                        $pre = '';
6682
                }
6683
6684 25
                return $pre . $str . $post;
6685
            }
6686
6687 3
            return $str;
6688
        }
6689
6690 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6691
6692 15
        $str_length = (int) self::strlen($str, $encoding);
6693
6694 15
        if ($pad_length >= $str_length) {
6695
            switch ($pad_type) {
6696 14
                case \STR_PAD_LEFT:
6697 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6698
6699 5
                    $diff = ($pad_length - $str_length);
6700
6701 5
                    $pre = (string) self::substr(
6702 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6703 5
                        0,
6704 5
                        $diff,
6705 5
                        $encoding
6706
                    );
6707 5
                    $post = '';
6708
6709 5
                    break;
6710
6711 9
                case \STR_PAD_BOTH:
6712 3
                    $diff = ($pad_length - $str_length);
6713
6714 3
                    $ps_length_left = (int) \floor($diff / 2);
6715
6716 3
                    $ps_length_right = (int) \ceil($diff / 2);
6717
6718 3
                    $pre = (string) self::substr(
6719 3
                        \str_repeat($pad_string, $ps_length_left),
6720 3
                        0,
6721 3
                        $ps_length_left,
6722 3
                        $encoding
6723
                    );
6724 3
                    $post = (string) self::substr(
6725 3
                        \str_repeat($pad_string, $ps_length_right),
6726 3
                        0,
6727 3
                        $ps_length_right,
6728 3
                        $encoding
6729
                    );
6730
6731 3
                    break;
6732
6733 6
                case \STR_PAD_RIGHT:
6734
                default:
6735 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6736
6737 6
                    $diff = ($pad_length - $str_length);
6738
6739 6
                    $post = (string) self::substr(
6740 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6741 6
                        0,
6742 6
                        $diff,
6743 6
                        $encoding
6744
                    );
6745 6
                    $pre = '';
6746
            }
6747
6748 14
            return $pre . $str . $post;
6749
        }
6750
6751 1
        return $str;
6752
    }
6753
6754
    /**
6755
     * Returns a new string of a given length such that both sides of the
6756
     * string are padded. Alias for pad() with a $padType of 'both'.
6757
     *
6758
     * @param string $str
6759
     * @param int    $length   <p>Desired string length after padding.</p>
6760
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6761
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6762
     *
6763
     * @return string string with padding applied
6764
     */
6765 11
    public static function str_pad_both(
6766
        string $str,
6767
        int $length,
6768
        string $padStr = ' ',
6769
        string $encoding = 'UTF-8'
6770
    ): string {
6771 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6772
    }
6773
6774
    /**
6775
     * Returns a new string of a given length such that the beginning of the
6776
     * string is padded. Alias for pad() with a $padType of 'left'.
6777
     *
6778
     * @param string $str
6779
     * @param int    $length   <p>Desired string length after padding.</p>
6780
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6781
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6782
     *
6783
     * @return string string with left padding
6784
     */
6785 7
    public static function str_pad_left(
6786
        string $str,
6787
        int $length,
6788
        string $padStr = ' ',
6789
        string $encoding = 'UTF-8'
6790
    ): string {
6791 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6792
    }
6793
6794
    /**
6795
     * Returns a new string of a given length such that the end of the string
6796
     * is padded. Alias for pad() with a $padType of 'right'.
6797
     *
6798
     * @param string $str
6799
     * @param int    $length   <p>Desired string length after padding.</p>
6800
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6801
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6802
     *
6803
     * @return string string with right padding
6804
     */
6805 7
    public static function str_pad_right(
6806
        string $str,
6807
        int $length,
6808
        string $padStr = ' ',
6809
        string $encoding = 'UTF-8'
6810
    ): string {
6811 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6812
    }
6813
6814
    /**
6815
     * Repeat a string.
6816
     *
6817
     * @param string $str        <p>
6818
     *                           The string to be repeated.
6819
     *                           </p>
6820
     * @param int    $multiplier <p>
6821
     *                           Number of time the input string should be
6822
     *                           repeated.
6823
     *                           </p>
6824
     *                           <p>
6825
     *                           multiplier has to be greater than or equal to 0.
6826
     *                           If the multiplier is set to 0, the function
6827
     *                           will return an empty string.
6828
     *                           </p>
6829
     *
6830
     * @return string the repeated string
6831
     */
6832 9
    public static function str_repeat(string $str, int $multiplier): string
6833
    {
6834 9
        $str = self::filter($str);
6835
6836 9
        return \str_repeat($str, $multiplier);
6837
    }
6838
6839
    /**
6840
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6841
     *
6842
     * Replace all occurrences of the search string with the replacement string
6843
     *
6844
     * @see http://php.net/manual/en/function.str-replace.php
6845
     *
6846
     * @param mixed $search  <p>
6847
     *                       The value being searched for, otherwise known as the needle.
6848
     *                       An array may be used to designate multiple needles.
6849
     *                       </p>
6850
     * @param mixed $replace <p>
6851
     *                       The replacement value that replaces found search
6852
     *                       values. An array may be used to designate multiple replacements.
6853
     *                       </p>
6854
     * @param mixed $subject <p>
6855
     *                       The string or array being searched and replaced on,
6856
     *                       otherwise known as the haystack.
6857
     *                       </p>
6858
     *                       <p>
6859
     *                       If subject is an array, then the search and
6860
     *                       replace is performed with every entry of
6861
     *                       subject, and the return value is an array as
6862
     *                       well.
6863
     *                       </p>
6864
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6865
     *
6866
     * @return mixed this function returns a string or an array with the replaced values
6867
     */
6868 12
    public static function str_replace(
6869
        $search,
6870
        $replace,
6871
        $subject,
6872
        int &$count = null
6873
    ) {
6874
        /**
6875
         * @psalm-suppress PossiblyNullArgument
6876
         */
6877 12
        return \str_replace(
6878 12
            $search,
6879 12
            $replace,
6880 12
            $subject,
6881 12
            $count
6882
        );
6883
    }
6884
6885
    /**
6886
     * Replaces $search from the beginning of string with $replacement.
6887
     *
6888
     * @param string $str         <p>The input string.</p>
6889
     * @param string $search      <p>The string to search for.</p>
6890
     * @param string $replacement <p>The replacement.</p>
6891
     *
6892
     * @return string string after the replacements
6893
     */
6894 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6895
    {
6896 17
        if ($str === '') {
6897 4
            if ($replacement === '') {
6898 2
                return '';
6899
            }
6900
6901 2
            if ($search === '') {
6902 2
                return $replacement;
6903
            }
6904
        }
6905
6906 13
        if ($search === '') {
6907 2
            return $str . $replacement;
6908
        }
6909
6910 11
        if (\strpos($str, $search) === 0) {
6911 9
            return $replacement . \substr($str, \strlen($search));
6912
        }
6913
6914 2
        return $str;
6915
    }
6916
6917
    /**
6918
     * Replaces $search from the ending of string with $replacement.
6919
     *
6920
     * @param string $str         <p>The input string.</p>
6921
     * @param string $search      <p>The string to search for.</p>
6922
     * @param string $replacement <p>The replacement.</p>
6923
     *
6924
     * @return string string after the replacements
6925
     */
6926 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6927
    {
6928 17
        if ($str === '') {
6929 4
            if ($replacement === '') {
6930 2
                return '';
6931
            }
6932
6933 2
            if ($search === '') {
6934 2
                return $replacement;
6935
            }
6936
        }
6937
6938 13
        if ($search === '') {
6939 2
            return $str . $replacement;
6940
        }
6941
6942 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6943 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6944
        }
6945
6946 11
        return $str;
6947
    }
6948
6949
    /**
6950
     * Replace the first "$search"-term with the "$replace"-term.
6951
     *
6952
     * @param string $search
6953
     * @param string $replace
6954
     * @param string $subject
6955
     *
6956
     * @return string
6957
     *
6958
     * @psalm-suppress InvalidReturnType
6959
     */
6960 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6961
    {
6962 2
        $pos = self::strpos($subject, $search);
6963
6964 2
        if ($pos !== false) {
6965
            /**
6966
             * @psalm-suppress InvalidReturnStatement
6967
             */
6968 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
6969 2
                $subject,
6970 2
                $replace,
6971 2
                $pos,
6972 2
                (int) self::strlen($search)
6973
            );
6974
        }
6975
6976 2
        return $subject;
6977
    }
6978
6979
    /**
6980
     * Replace the last "$search"-term with the "$replace"-term.
6981
     *
6982
     * @param string $search
6983
     * @param string $replace
6984
     * @param string $subject
6985
     *
6986
     * @return string
6987
     *
6988
     * @psalm-suppress InvalidReturnType
6989
     */
6990 2
    public static function str_replace_last(
6991
        string $search,
6992
        string $replace,
6993
        string $subject
6994
    ): string {
6995 2
        $pos = self::strrpos($subject, $search);
6996 2
        if ($pos !== false) {
6997
            /**
6998
             * @psalm-suppress InvalidReturnStatement
6999
             */
7000 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7001 2
                $subject,
7002 2
                $replace,
7003 2
                $pos,
7004 2
                (int) self::strlen($search)
7005
            );
7006
        }
7007
7008 2
        return $subject;
7009
    }
7010
7011
    /**
7012
     * Shuffles all the characters in the string.
7013
     *
7014
     * PS: uses random algorithm which is weak for cryptography purposes
7015
     *
7016
     * @param string $str      <p>The input string</p>
7017
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7018
     *
7019
     * @return string the shuffled string
7020
     */
7021 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7022
    {
7023 5
        if ($encoding === 'UTF-8') {
7024 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7025
            /** @noinspection NonSecureShuffleUsageInspection */
7026 5
            \shuffle($indexes);
7027
7028
            // init
7029 5
            $shuffledStr = '';
7030
7031 5
            foreach ($indexes as &$i) {
7032 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7033 5
                if ($tmpSubStr !== false) {
7034 5
                    $shuffledStr .= $tmpSubStr;
7035
                }
7036
            }
7037
        } else {
7038
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7039
7040
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7041
            /** @noinspection NonSecureShuffleUsageInspection */
7042
            \shuffle($indexes);
7043
7044
            // init
7045
            $shuffledStr = '';
7046
7047
            foreach ($indexes as &$i) {
7048
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7049
                if ($tmpSubStr !== false) {
7050
                    $shuffledStr .= $tmpSubStr;
7051
                }
7052
            }
7053
        }
7054
7055 5
        return $shuffledStr;
7056
    }
7057
7058
    /**
7059
     * Returns the substring beginning at $start, and up to, but not including
7060
     * the index specified by $end. If $end is omitted, the function extracts
7061
     * the remaining string. If $end is negative, it is computed from the end
7062
     * of the string.
7063
     *
7064
     * @param string $str
7065
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7066
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7067
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7068
     *
7069
     * @return false|string
7070
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7071
     *                      characters long, <b>FALSE</b> will be returned.
7072
     */
7073 18
    public static function str_slice(
7074
        string $str,
7075
        int $start,
7076
        int $end = null,
7077
        string $encoding = 'UTF-8'
7078
    ) {
7079 18
        if ($encoding === 'UTF-8') {
7080 7
            if ($end === null) {
7081 1
                $length = (int) \mb_strlen($str);
7082 6
            } elseif ($end >= 0 && $end <= $start) {
7083 2
                return '';
7084 4
            } elseif ($end < 0) {
7085 1
                $length = (int) \mb_strlen($str) + $end - $start;
7086
            } else {
7087 3
                $length = $end - $start;
7088
            }
7089
7090 5
            return \mb_substr($str, $start, $length);
7091
        }
7092
7093 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7094
7095 11
        if ($end === null) {
7096 5
            $length = (int) self::strlen($str, $encoding);
7097 6
        } elseif ($end >= 0 && $end <= $start) {
7098 2
            return '';
7099 4
        } elseif ($end < 0) {
7100 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7101
        } else {
7102 3
            $length = $end - $start;
7103
        }
7104
7105 9
        return self::substr($str, $start, $length, $encoding);
7106
    }
7107
7108
    /**
7109
     * Convert a string to e.g.: "snake_case"
7110
     *
7111
     * @param string $str
7112
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7113
     *
7114
     * @return string string in snake_case
7115
     */
7116 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7117
    {
7118 20
        if ($str === '') {
7119
            return '';
7120
        }
7121
7122 20
        $str = \str_replace(
7123 20
            '-',
7124 20
            '_',
7125 20
            self::normalize_whitespace($str)
7126
        );
7127
7128 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7129 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7130
        }
7131
7132 20
        $str = (string) \preg_replace_callback(
7133 20
            '/([\d|\p{Lu}])/u',
7134
            /**
7135
             * @param string[] $matches
7136
             *
7137
             * @return string
7138
             */
7139
            static function (array $matches) use ($encoding): string {
7140 9
                $match = $matches[1];
7141 9
                $matchInt = (int) $match;
7142
7143 9
                if ((string) $matchInt === $match) {
7144 4
                    return '_' . $match . '_';
7145
                }
7146
7147 5
                if ($encoding === 'UTF-8') {
7148 5
                    return '_' . \mb_strtolower($match);
7149
                }
7150
7151
                return '_' . self::strtolower($match, $encoding);
7152 20
            },
7153 20
            $str
7154
        );
7155
7156 20
        $str = (string) \preg_replace(
7157
            [
7158 20
                '/\s+/u',        // convert spaces to "_"
7159
                '/^\s+|\s+$/u',  // trim leading & trailing spaces
7160
                '/_+/',         // remove double "_"
7161
            ],
7162
            [
7163 20
                '_',
7164
                '',
7165
                '_',
7166
            ],
7167 20
            $str
7168
        );
7169
7170 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7171
    }
7172
7173
    /**
7174
     * Sort all characters according to code points.
7175
     *
7176
     * @param string $str    <p>A UTF-8 string.</p>
7177
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7178
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7179
     *
7180
     * @return string string of sorted characters
7181
     */
7182 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7183
    {
7184 2
        $array = self::codepoints($str);
7185
7186 2
        if ($unique) {
7187 2
            $array = \array_flip(\array_flip($array));
7188
        }
7189
7190 2
        if ($desc) {
7191 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7191
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7192
        } else {
7193 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7193
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7194
        }
7195
7196 2
        return self::string($array);
7197
    }
7198
7199
    /**
7200
     * Convert a string to an array of Unicode characters.
7201
     *
7202
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
7203
     * @param int                       $length             [optional] <p>Max character length of each array
7204
     *                                                      element.</p>
7205
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
7206
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
7207
     *                                                      "mb_substr"</p>
7208
     *
7209
     * @return array
7210
     *               <p>An array containing chunks of the input.</p>
7211
     */
7212 87
    public static function str_split(
7213
        $str,
7214
        int $length = 1,
7215
        bool $cleanUtf8 = false,
7216
        bool $tryToUseMbFunction = true
7217
    ): array {
7218 87
        if ($length <= 0) {
7219 3
            return [];
7220
        }
7221
7222 86
        if (\is_array($str) === true) {
7223 2
            foreach ($str as $k => &$v) {
7224 2
                $v = self::str_split(
7225 2
                    $v,
7226 2
                    $length,
7227 2
                    $cleanUtf8,
7228 2
                    $tryToUseMbFunction
7229
                );
7230
            }
7231
7232 2
            return $str;
7233
        }
7234
7235
        // init
7236 86
        $str = (string) $str;
7237
7238 86
        if ($str === '') {
7239 13
            return [];
7240
        }
7241
7242 83
        if ($cleanUtf8 === true) {
7243 19
            $str = self::clean($str);
7244
        }
7245
7246
        if (
7247 83
            $tryToUseMbFunction === true
7248
            &&
7249 83
            self::$SUPPORT['mbstring'] === true
7250
        ) {
7251 79
            $iMax = \mb_strlen($str);
7252 79
            if ($iMax <= 127) {
7253 73
                $ret = [];
7254 73
                for ($i = 0; $i < $iMax; ++$i) {
7255 73
                    $ret[] = \mb_substr($str, $i, 1);
7256
                }
7257
            } else {
7258 15
                $retArray = [];
7259 15
                \preg_match_all('/./us', $str, $retArray);
7260 79
                $ret = $retArray[0] ?? [];
7261
            }
7262 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
7263 17
            $retArray = [];
7264 17
            \preg_match_all('/./us', $str, $retArray);
7265 17
            $ret = $retArray[0] ?? [];
7266
        } else {
7267
7268
            // fallback
7269
7270 8
            $ret = [];
7271 8
            $len = \strlen($str);
7272
7273
            /** @noinspection ForeachInvariantsInspection */
7274 8
            for ($i = 0; $i < $len; ++$i) {
7275 8
                if (($str[$i] & "\x80") === "\x00") {
7276 8
                    $ret[] = $str[$i];
7277
                } elseif (
7278 8
                    isset($str[$i + 1])
7279
                    &&
7280 8
                    ($str[$i] & "\xE0") === "\xC0"
7281
                ) {
7282 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
7283 4
                        $ret[] = $str[$i] . $str[$i + 1];
7284
7285 4
                        ++$i;
7286
                    }
7287
                } elseif (
7288 6
                    isset($str[$i + 2])
7289
                    &&
7290 6
                    ($str[$i] & "\xF0") === "\xE0"
7291
                ) {
7292
                    if (
7293 6
                        ($str[$i + 1] & "\xC0") === "\x80"
7294
                        &&
7295 6
                        ($str[$i + 2] & "\xC0") === "\x80"
7296
                    ) {
7297 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
7298
7299 6
                        $i += 2;
7300
                    }
7301
                } elseif (
7302
                    isset($str[$i + 3])
7303
                    &&
7304
                    ($str[$i] & "\xF8") === "\xF0"
7305
                ) {
7306
                    if (
7307
                        ($str[$i + 1] & "\xC0") === "\x80"
7308
                        &&
7309
                        ($str[$i + 2] & "\xC0") === "\x80"
7310
                        &&
7311
                        ($str[$i + 3] & "\xC0") === "\x80"
7312
                    ) {
7313
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
7314
7315
                        $i += 3;
7316
                    }
7317
                }
7318
            }
7319
        }
7320
7321 83
        if ($length > 1) {
7322 11
            $ret = \array_chunk($ret, $length);
7323
7324 11
            return \array_map(
7325
                static function (array &$item): string {
7326 11
                    return \implode('', $item);
7327 11
                },
7328 11
                $ret
7329
            );
7330
        }
7331
7332 76
        if (isset($ret[0]) && $ret[0] === '') {
7333
            return [];
7334
        }
7335
7336 76
        return $ret;
7337
    }
7338
7339
    /**
7340
     * Splits the string with the provided regular expression, returning an
7341
     * array of Stringy objects. An optional integer $limit will truncate the
7342
     * results.
7343
     *
7344
     * @param string $str
7345
     * @param string $pattern <p>The regex with which to split the string.</p>
7346
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7347
     *
7348
     * @return string[] an array of strings
7349
     */
7350 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7351
    {
7352 16
        if ($limit === 0) {
7353 2
            return [];
7354
        }
7355
7356 14
        if ($pattern === '') {
7357 1
            return [$str];
7358
        }
7359
7360 13
        if (self::$SUPPORT['mbstring'] === true) {
7361 13
            if ($limit >= 0) {
7362
                /** @noinspection PhpComposerExtensionStubsInspection */
7363 8
                $resultTmp = \mb_split($pattern, $str);
7364
7365 8
                $result = [];
7366 8
                foreach ($resultTmp as $itemTmp) {
7367 8
                    if ($limit === 0) {
7368 4
                        break;
7369
                    }
7370 8
                    --$limit;
7371
7372 8
                    $result[] = $itemTmp;
7373
                }
7374
7375 8
                return $result;
7376
            }
7377
7378
            /** @noinspection PhpComposerExtensionStubsInspection */
7379 5
            return \mb_split($pattern, $str);
7380
        }
7381
7382
        if ($limit > 0) {
7383
            ++$limit;
7384
        } else {
7385
            $limit = -1;
7386
        }
7387
7388
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7389
7390
        if ($array === false) {
7391
            return [];
7392
        }
7393
7394
        if ($limit > 0 && \count($array) === $limit) {
7395
            \array_pop($array);
7396
        }
7397
7398
        return $array;
7399
    }
7400
7401
    /**
7402
     * Check if the string starts with the given substring.
7403
     *
7404
     * @param string $haystack <p>The string to search in.</p>
7405
     * @param string $needle   <p>The substring to search for.</p>
7406
     *
7407
     * @return bool
7408
     */
7409 19
    public static function str_starts_with(string $haystack, string $needle): bool
7410
    {
7411 19
        if ($needle === '') {
7412 2
            return true;
7413
        }
7414
7415 19
        if ($haystack === '') {
7416
            return false;
7417
        }
7418
7419 19
        return \strpos($haystack, $needle) === 0;
7420
    }
7421
7422
    /**
7423
     * Returns true if the string begins with any of $substrings, false otherwise.
7424
     *
7425
     * - case-sensitive
7426
     *
7427
     * @param string $str        <p>The input string.</p>
7428
     * @param array  $substrings <p>Substrings to look for.</p>
7429
     *
7430
     * @return bool whether or not $str starts with $substring
7431
     */
7432 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7433
    {
7434 8
        if ($str === '') {
7435
            return false;
7436
        }
7437
7438 8
        if ($substrings === []) {
7439
            return false;
7440
        }
7441
7442 8
        foreach ($substrings as &$substring) {
7443 8
            if (self::str_starts_with($str, $substring)) {
7444 8
                return true;
7445
            }
7446
        }
7447
7448 6
        return false;
7449
    }
7450
7451
    /**
7452
     * Gets the substring after the first occurrence of a separator.
7453
     *
7454
     * @param string $str       <p>The input string.</p>
7455
     * @param string $separator <p>The string separator.</p>
7456
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7457
     *
7458
     * @return string
7459
     */
7460 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7461
    {
7462 1
        if ($separator === '' || $str === '') {
7463 1
            return '';
7464
        }
7465
7466 1
        if ($encoding === 'UTF-8') {
7467 1
            $offset = \mb_strpos($str, $separator);
7468 1
            if ($offset === false) {
7469 1
                return '';
7470
            }
7471
7472 1
            return (string) \mb_substr(
7473 1
                $str,
7474 1
                $offset + (int) \mb_strlen($separator)
7475
            );
7476
        }
7477
7478
        $offset = self::strpos($str, $separator, 0, $encoding);
7479
        if ($offset === false) {
7480
            return '';
7481
        }
7482
7483
        return (string) \mb_substr(
7484
            $str,
7485
            $offset + (int) self::strlen($separator, $encoding),
7486
            null,
7487
            $encoding
7488
        );
7489
    }
7490
7491
    /**
7492
     * Gets the substring after the last occurrence of a separator.
7493
     *
7494
     * @param string $str       <p>The input string.</p>
7495
     * @param string $separator <p>The string separator.</p>
7496
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7497
     *
7498
     * @return string
7499
     */
7500 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7501
    {
7502 1
        if ($separator === '' || $str === '') {
7503 1
            return '';
7504
        }
7505
7506 1
        if ($encoding === 'UTF-8') {
7507 1
            $offset = \mb_strrpos($str, $separator);
7508 1
            if ($offset === false) {
7509 1
                return '';
7510
            }
7511
7512 1
            return (string) \mb_substr(
7513 1
                $str,
7514 1
                $offset + (int) \mb_strlen($separator)
7515
            );
7516
        }
7517
7518
        $offset = self::strrpos($str, $separator, 0, $encoding);
7519
        if ($offset === false) {
7520
            return '';
7521
        }
7522
7523
        return (string) self::substr(
7524
            $str,
7525
            $offset + (int) self::strlen($separator, $encoding),
7526
            null,
7527
            $encoding
7528
        );
7529
    }
7530
7531
    /**
7532
     * Gets the substring before the first occurrence of a separator.
7533
     *
7534
     * @param string $str       <p>The input string.</p>
7535
     * @param string $separator <p>The string separator.</p>
7536
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7537
     *
7538
     * @return string
7539
     */
7540 1
    public static function str_substr_before_first_separator(
7541
        string $str,
7542
        string $separator,
7543
        string $encoding = 'UTF-8'
7544
    ): string {
7545 1
        if ($separator === '' || $str === '') {
7546 1
            return '';
7547
        }
7548
7549 1
        if ($encoding === 'UTF-8') {
7550 1
            $offset = \mb_strpos($str, $separator);
7551 1
            if ($offset === false) {
7552 1
                return '';
7553
            }
7554
7555 1
            return (string) \mb_substr(
7556 1
                $str,
7557 1
                0,
7558 1
                $offset
7559
            );
7560
        }
7561
7562
        $offset = self::strpos($str, $separator, 0, $encoding);
7563
        if ($offset === false) {
7564
            return '';
7565
        }
7566
7567
        return (string) self::substr(
7568
            $str,
7569
            0,
7570
            $offset,
7571
            $encoding
7572
        );
7573
    }
7574
7575
    /**
7576
     * Gets the substring before the last occurrence of a separator.
7577
     *
7578
     * @param string $str       <p>The input string.</p>
7579
     * @param string $separator <p>The string separator.</p>
7580
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7581
     *
7582
     * @return string
7583
     */
7584 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7585
    {
7586 1
        if ($separator === '' || $str === '') {
7587 1
            return '';
7588
        }
7589
7590 1
        if ($encoding === 'UTF-8') {
7591 1
            $offset = \mb_strrpos($str, $separator);
7592 1
            if ($offset === false) {
7593 1
                return '';
7594
            }
7595
7596 1
            return (string) \mb_substr(
7597 1
                $str,
7598 1
                0,
7599 1
                $offset
7600
            );
7601
        }
7602
7603
        $offset = self::strrpos($str, $separator, 0, $encoding);
7604
        if ($offset === false) {
7605
            return '';
7606
        }
7607
7608
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7609
7610
        return (string) self::substr(
7611
            $str,
7612
            0,
7613
            $offset,
7614
            $encoding
7615
        );
7616
    }
7617
7618
    /**
7619
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7620
     *
7621
     * @param string $str          <p>The input string.</p>
7622
     * @param string $needle       <p>The string to look for.</p>
7623
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7624
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7625
     *
7626
     * @return string
7627
     */
7628 2
    public static function str_substr_first(
7629
        string $str,
7630
        string $needle,
7631
        bool $beforeNeedle = false,
7632
        string $encoding = 'UTF-8'
7633
    ): string {
7634 2
        if ($str === '' || $needle === '') {
7635 2
            return '';
7636
        }
7637
7638 2
        if ($encoding === 'UTF-8') {
7639 2
            if ($beforeNeedle === true) {
7640 1
                $part = \mb_strstr(
7641 1
                    $str,
7642 1
                    $needle,
7643 1
                    $beforeNeedle
7644
                );
7645
            } else {
7646 1
                $part = \mb_strstr(
7647 1
                    $str,
7648 2
                    $needle
7649
                );
7650
            }
7651
        } else {
7652
            $part = self::strstr(
7653
                $str,
7654
                $needle,
7655
                $beforeNeedle,
7656
                $encoding
7657
            );
7658
        }
7659
7660 2
        return $part === false ? '' : $part;
7661
    }
7662
7663
    /**
7664
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7665
     *
7666
     * @param string $str          <p>The input string.</p>
7667
     * @param string $needle       <p>The string to look for.</p>
7668
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7669
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7670
     *
7671
     * @return string
7672
     */
7673 2
    public static function str_substr_last(
7674
        string $str,
7675
        string $needle,
7676
        bool $beforeNeedle = false,
7677
        string $encoding = 'UTF-8'
7678
    ): string {
7679 2
        if ($str === '' || $needle === '') {
7680 2
            return '';
7681
        }
7682
7683 2
        if ($encoding === 'UTF-8') {
7684 2
            if ($beforeNeedle === true) {
7685 1
                $part = \mb_strrchr(
7686 1
                    $str,
7687 1
                    $needle,
7688 1
                    $beforeNeedle
7689
                );
7690
            } else {
7691 1
                $part = \mb_strrchr(
7692 1
                    $str,
7693 2
                    $needle
7694
                );
7695
            }
7696
        } else {
7697
            $part = self::strrchr(
7698
                $str,
7699
                $needle,
7700
                $beforeNeedle,
7701
                $encoding
7702
            );
7703
        }
7704
7705 2
        return $part === false ? '' : $part;
7706
    }
7707
7708
    /**
7709
     * Surrounds $str with the given substring.
7710
     *
7711
     * @param string $str
7712
     * @param string $substring <p>The substring to add to both sides.</P>
7713
     *
7714
     * @return string string with the substring both prepended and appended
7715
     */
7716 5
    public static function str_surround(string $str, string $substring): string
7717
    {
7718 5
        return $substring . $str . $substring;
7719
    }
7720
7721
    /**
7722
     * Returns a trimmed string with the first letter of each word capitalized.
7723
     * Also accepts an array, $ignore, allowing you to list words not to be
7724
     * capitalized.
7725
     *
7726
     * @param string              $str
7727
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7728
     *                                                   Default: null</p>
7729
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7730
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7731
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7732
     *                                                   tr</p>
7733
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7734
     *                                                   ß</p>
7735
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7736
     *
7737
     * @return string the titleized string
7738
     */
7739 5
    public static function str_titleize(
7740
        string $str,
7741
        array $ignore = null,
7742
        string $encoding = 'UTF-8',
7743
        bool $cleanUtf8 = false,
7744
        string $lang = null,
7745
        bool $tryToKeepStringLength = false,
7746
        bool $useTrimFirst = true
7747
    ): string {
7748 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7749 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7750
        }
7751
7752 5
        if ($useTrimFirst === true) {
7753 5
            $str = \trim($str);
7754
        }
7755
7756 5
        if ($cleanUtf8 === true) {
7757
            $str = self::clean($str);
7758
        }
7759
7760 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7761
7762 5
        return (string) \preg_replace_callback(
7763 5
            '/([\S]+)/u',
7764
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7765 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7766 2
                    return $match[0];
7767
                }
7768
7769 5
                if ($useMbFunction === true) {
7770 5
                    if ($encoding === 'UTF-8') {
7771 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7772 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7773
                    }
7774
7775
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7776
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7777
                }
7778
7779
                return self::ucfirst(
7780
                    self::strtolower(
7781
                        $match[0],
7782
                        $encoding,
7783
                        false,
7784
                        $lang,
7785
                        $tryToKeepStringLength
7786
                    ),
7787
                    $encoding,
7788
                    false,
7789
                    $lang,
7790
                    $tryToKeepStringLength
7791
                );
7792 5
            },
7793 5
            $str
7794
        );
7795
    }
7796
7797
    /**
7798
     * Returns a trimmed string in proper title case.
7799
     *
7800
     * Also accepts an array, $ignore, allowing you to list words not to be
7801
     * capitalized.
7802
     *
7803
     * Adapted from John Gruber's script.
7804
     *
7805
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7806
     *
7807
     * @param string $str
7808
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7809
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7810
     *
7811
     * @return string the titleized string
7812
     */
7813 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7814
    {
7815 35
        $smallWords = \array_merge(
7816
            [
7817 35
                '(?<!q&)a',
7818
                'an',
7819
                'and',
7820
                'as',
7821
                'at(?!&t)',
7822
                'but',
7823
                'by',
7824
                'en',
7825
                'for',
7826
                'if',
7827
                'in',
7828
                'of',
7829
                'on',
7830
                'or',
7831
                'the',
7832
                'to',
7833
                'v[.]?',
7834
                'via',
7835
                'vs[.]?',
7836
            ],
7837 35
            $ignore
7838
        );
7839
7840 35
        $smallWordsRx = \implode('|', $smallWords);
7841 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7842
7843 35
        $str = \trim($str);
7844
7845 35
        if (self::has_lowercase($str) === false) {
7846 2
            $str = self::strtolower($str, $encoding);
7847
        }
7848
7849
        // the main substitutions
7850 35
        $str = (string) \preg_replace_callback(
7851
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7852
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7853 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7854
                        |
7855 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7856
                        |
7857 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7858
                        |
7859 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7860
                      ) (_*) \b                                                           # 6. With trailing underscore
7861
                    ~ux',
7862
            /**
7863
             * @param string[] $matches
7864
             *
7865
             * @return string
7866
             */
7867
            static function (array $matches) use ($encoding): string {
7868
                // preserve leading underscore
7869 35
                $str = $matches[1];
7870 35
                if ($matches[2]) {
7871
                    // preserve URLs, domains, emails and file paths
7872 5
                    $str .= $matches[2];
7873 35
                } elseif ($matches[3]) {
7874
                    // lower-case small words
7875 25
                    $str .= self::strtolower($matches[3], $encoding);
7876 35
                } elseif ($matches[4]) {
7877
                    // capitalize word w/o internal caps
7878 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7879
                } else {
7880
                    // preserve other kinds of word (iPhone)
7881 7
                    $str .= $matches[5];
7882
                }
7883
                // Preserve trailing underscore
7884 35
                $str .= $matches[6];
7885
7886 35
                return $str;
7887 35
            },
7888 35
            $str
7889
        );
7890
7891
        // Exceptions for small words: capitalize at start of title...
7892 35
        $str = (string) \preg_replace_callback(
7893
            '~(  \A [[:punct:]]*                # start of title...
7894
                      |  [:.;?!][ ]+               # or of subsentence...
7895
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7896 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7897
                     ~uxi',
7898
            /**
7899
             * @param string[] $matches
7900
             *
7901
             * @return string
7902
             */
7903
            static function (array $matches) use ($encoding): string {
7904 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7905 35
            },
7906 35
            $str
7907
        );
7908
7909
        // ...and end of title
7910 35
        $str = (string) \preg_replace_callback(
7911 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7912
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7913
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7914
                     ~uxi',
7915
            /**
7916
             * @param string[] $matches
7917
             *
7918
             * @return string
7919
             */
7920
            static function (array $matches) use ($encoding): string {
7921 3
                return static::str_upper_first($matches[1], $encoding);
7922 35
            },
7923 35
            $str
7924
        );
7925
7926
        // Exceptions for small words in hyphenated compound words.
7927
        // e.g. "in-flight" -> In-Flight
7928 35
        $str = (string) \preg_replace_callback(
7929
            '~\b
7930
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7931 35
                        ( ' . $smallWordsRx . ' )
7932
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7933
                       ~uxi',
7934
            /**
7935
             * @param string[] $matches
7936
             *
7937
             * @return string
7938
             */
7939
            static function (array $matches) use ($encoding): string {
7940
                return static::str_upper_first($matches[1], $encoding);
7941 35
            },
7942 35
            $str
7943
        );
7944
7945
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7946 35
        $str = (string) \preg_replace_callback(
7947
            '~\b
7948
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7949
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7950 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7951
                      (?!	- )                   # Negative lookahead for another -
7952
                     ~uxi',
7953
            /**
7954
             * @param string[] $matches
7955
             *
7956
             * @return string
7957
             */
7958
            static function (array $matches) use ($encoding): string {
7959
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7960 35
            },
7961 35
            $str
7962
        );
7963
7964 35
        return $str;
7965
    }
7966
7967
    /**
7968
     * Get a binary representation of a specific string.
7969
     *
7970
     * @param string $str <p>The input string.</p>
7971
     *
7972
     * @return string
7973
     */
7974 2
    public static function str_to_binary(string $str): string
7975
    {
7976 2
        $value = \unpack('H*', $str);
7977
7978 2
        return \base_convert($value[1], 16, 2);
7979
    }
7980
7981
    /**
7982
     * @param string   $str
7983
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7984
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7985
     *
7986
     * @return string[]
7987
     */
7988 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7989
    {
7990 17
        if ($str === '') {
7991 1
            return $removeEmptyValues === true ? [] : [''];
7992
        }
7993
7994 16
        if (self::$SUPPORT['mbstring'] === true) {
7995
            /** @noinspection PhpComposerExtensionStubsInspection */
7996 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7997
        } else {
7998
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7999
        }
8000
8001 16
        if ($return === false) {
8002
            return $removeEmptyValues === true ? [] : [''];
8003
        }
8004
8005
        if (
8006 16
            $removeShortValues === null
8007
            &&
8008 16
            $removeEmptyValues === false
8009
        ) {
8010 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8011
        }
8012
8013
        return self::reduce_string_array(
8014
            $return,
8015
            $removeEmptyValues,
8016
            $removeShortValues
8017
        );
8018
    }
8019
8020
    /**
8021
     * Convert a string into an array of words.
8022
     *
8023
     * @param string   $str
8024
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
8025
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
8026
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
8027
     *
8028
     * @return string[]
8029
     */
8030 13
    public static function str_to_words(
8031
        string $str,
8032
        string $charList = '',
8033
        bool $removeEmptyValues = false,
8034
        int $removeShortValues = null
8035
    ): array {
8036 13
        if ($str === '') {
8037 4
            return $removeEmptyValues === true ? [] : [''];
8038
        }
8039
8040 13
        $charList = self::rxClass($charList, '\pL');
8041
8042 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
8043 13
        if ($return === false) {
8044
            return $removeEmptyValues === true ? [] : [''];
8045
        }
8046
8047
        if (
8048 13
            $removeShortValues === null
8049
            &&
8050 13
            $removeEmptyValues === false
8051
        ) {
8052 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8053
        }
8054
8055 2
        $tmpReturn = self::reduce_string_array(
8056 2
            $return,
8057 2
            $removeEmptyValues,
8058 2
            $removeShortValues
8059
        );
8060
8061 2
        foreach ($tmpReturn as &$item) {
8062 2
            $item = (string) $item;
8063
        }
8064
8065 2
        return $tmpReturn;
8066
    }
8067
8068
    /**
8069
     * alias for "UTF8::to_ascii()"
8070
     *
8071
     * @param string $str
8072
     * @param string $unknown
8073
     * @param bool   $strict
8074
     *
8075
     * @return string
8076
     *
8077
     * @see UTF8::to_ascii()
8078
     */
8079 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
8080
    {
8081 8
        return self::to_ascii($str, $unknown, $strict);
8082
    }
8083
8084
    /**
8085
     * Truncates the string to a given length. If $substring is provided, and
8086
     * truncating occurs, the string is further truncated so that the substring
8087
     * may be appended without exceeding the desired length.
8088
     *
8089
     * @param string $str
8090
     * @param int    $length    <p>Desired length of the truncated string.</p>
8091
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
8092
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8093
     *
8094
     * @return string string after truncating
8095
     */
8096 22
    public static function str_truncate(
8097
        string $str,
8098
        int $length,
8099
        string $substring = '',
8100
        string $encoding = 'UTF-8'
8101
    ): string {
8102 22
        if ($str === '') {
8103
            return '';
8104
        }
8105
8106 22
        if ($encoding === 'UTF-8') {
8107 10
            if ($length >= (int) \mb_strlen($str)) {
8108 2
                return $str;
8109
            }
8110
8111 8
            if ($substring !== '') {
8112 4
                $length -= (int) \mb_strlen($substring);
8113
8114
                /** @noinspection UnnecessaryCastingInspection */
8115 4
                return (string) \mb_substr($str, 0, $length) . $substring;
8116
            }
8117
8118
            /** @noinspection UnnecessaryCastingInspection */
8119 4
            return (string) \mb_substr($str, 0, $length);
8120
        }
8121
8122 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8123
8124 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8125 2
            return $str;
8126
        }
8127
8128 10
        if ($substring !== '') {
8129 6
            $length -= (int) self::strlen($substring, $encoding);
8130
        }
8131
8132
        return (
8133 10
               (string) self::substr(
8134 10
                   $str,
8135 10
                   0,
8136 10
                   $length,
8137 10
                   $encoding
8138
               )
8139 10
               ) . $substring;
8140
    }
8141
8142
    /**
8143
     * Truncates the string to a given length, while ensuring that it does not
8144
     * split words. If $substring is provided, and truncating occurs, the
8145
     * string is further truncated so that the substring may be appended without
8146
     * exceeding the desired length.
8147
     *
8148
     * @param string $str
8149
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8150
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8151
     *                                                ''</p>
8152
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8153
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8154
     *
8155
     * @return string string after truncating
8156
     */
8157 47
    public static function str_truncate_safe(
8158
        string $str,
8159
        int $length,
8160
        string $substring = '',
8161
        string $encoding = 'UTF-8',
8162
        bool $ignoreDoNotSplitWordsForOneWord = false
8163
    ): string {
8164 47
        if ($str === '' || $length <= 0) {
8165 1
            return $substring;
8166
        }
8167
8168 47
        if ($encoding === 'UTF-8') {
8169 21
            if ($length >= (int) \mb_strlen($str)) {
8170 5
                return $str;
8171
            }
8172
8173
            // need to further trim the string so we can append the substring
8174 17
            $length -= (int) \mb_strlen($substring);
8175 17
            if ($length <= 0) {
8176 1
                return $substring;
8177
            }
8178
8179 17
            $truncated = \mb_substr($str, 0, $length);
8180
8181 17
            if ($truncated === false) {
8182
                return '';
8183
            }
8184
8185
            // if the last word was truncated
8186 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8187 17
            if ($strPosSpace !== $length) {
8188
                // find pos of the last occurrence of a space, get up to that
8189 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8190
8191
                if (
8192 13
                    $lastPos !== false
8193
                    ||
8194 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8195
                ) {
8196 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8197
                }
8198
            }
8199
        } else {
8200 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8201
8202 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8203 4
                return $str;
8204
            }
8205
8206
            // need to further trim the string so we can append the substring
8207 22
            $length -= (int) self::strlen($substring, $encoding);
8208 22
            if ($length <= 0) {
8209
                return $substring;
8210
            }
8211
8212 22
            $truncated = self::substr($str, 0, $length, $encoding);
8213
8214 22
            if ($truncated === false) {
8215
                return '';
8216
            }
8217
8218
            // if the last word was truncated
8219 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8220 22
            if ($strPosSpace !== $length) {
8221
                // find pos of the last occurrence of a space, get up to that
8222 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8223
8224
                if (
8225 12
                    $lastPos !== false
8226
                    ||
8227 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8228
                ) {
8229 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8230
                }
8231
            }
8232
        }
8233
8234 39
        return $truncated . $substring;
8235
    }
8236
8237
    /**
8238
     * Returns a lowercase and trimmed string separated by underscores.
8239
     * Underscores are inserted before uppercase characters (with the exception
8240
     * of the first character of the string), and in place of spaces as well as
8241
     * dashes.
8242
     *
8243
     * @param string $str
8244
     *
8245
     * @return string the underscored string
8246
     */
8247 16
    public static function str_underscored(string $str): string
8248
    {
8249 16
        return self::str_delimit($str, '_');
8250
    }
8251
8252
    /**
8253
     * Returns an UpperCamelCase version of the supplied string. It trims
8254
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8255
     * and underscores, and removes spaces, dashes, underscores.
8256
     *
8257
     * @param string      $str                   <p>The input string.</p>
8258
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8259
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8260
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8261
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8262
     *
8263
     * @return string string in UpperCamelCase
8264
     */
8265 13
    public static function str_upper_camelize(
8266
        string $str,
8267
        string $encoding = 'UTF-8',
8268
        bool $cleanUtf8 = false,
8269
        string $lang = null,
8270
        bool $tryToKeepStringLength = false
8271
    ): string {
8272 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8273
    }
8274
8275
    /**
8276
     * alias for "UTF8::ucfirst()"
8277
     *
8278
     * @param string      $str
8279
     * @param string      $encoding
8280
     * @param bool        $cleanUtf8
8281
     * @param string|null $lang
8282
     * @param bool        $tryToKeepStringLength
8283
     *
8284
     * @return string
8285
     *
8286
     * @see UTF8::ucfirst()
8287
     */
8288 39
    public static function str_upper_first(
8289
        string $str,
8290
        string $encoding = 'UTF-8',
8291
        bool $cleanUtf8 = false,
8292
        string $lang = null,
8293
        bool $tryToKeepStringLength = false
8294
    ): string {
8295 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8296
    }
8297
8298
    /**
8299
     * Counts number of words in the UTF-8 string.
8300
     *
8301
     * @param string $str      <p>The input string.</p>
8302
     * @param int    $format   [optional] <p>
8303
     *                         <strong>0</strong> => return a number of words (default)<br>
8304
     *                         <strong>1</strong> => return an array of words<br>
8305
     *                         <strong>2</strong> => return an array of words with word-offset as key
8306
     *                         </p>
8307
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8308
     *
8309
     * @return int|string[] The number of words in the string
8310
     */
8311 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8312
    {
8313 2
        $strParts = self::str_to_words($str, $charlist);
8314
8315 2
        $len = \count($strParts);
8316
8317 2
        if ($format === 1) {
8318 2
            $numberOfWords = [];
8319 2
            for ($i = 1; $i < $len; $i += 2) {
8320 2
                $numberOfWords[] = $strParts[$i];
8321
            }
8322 2
        } elseif ($format === 2) {
8323 2
            $numberOfWords = [];
8324 2
            $offset = (int) self::strlen($strParts[0]);
8325 2
            for ($i = 1; $i < $len; $i += 2) {
8326 2
                $numberOfWords[$offset] = $strParts[$i];
8327 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8328
            }
8329
        } else {
8330 2
            $numberOfWords = (int) (($len - 1) / 2);
8331
        }
8332
8333 2
        return $numberOfWords;
8334
    }
8335
8336
    /**
8337
     * Case-insensitive string comparison.
8338
     *
8339
     * INFO: Case-insensitive version of UTF8::strcmp()
8340
     *
8341
     * @param string $str1     <p>The first string.</p>
8342
     * @param string $str2     <p>The second string.</p>
8343
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8344
     *
8345
     * @return int
8346
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8347
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8348
     *             <strong>0</strong> if they are equal
8349
     */
8350 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8351
    {
8352 23
        return self::strcmp(
8353 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8354 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8355
        );
8356
    }
8357
8358
    /**
8359
     * alias for "UTF8::strstr()"
8360
     *
8361
     * @param string $haystack
8362
     * @param string $needle
8363
     * @param bool   $before_needle
8364
     * @param string $encoding
8365
     * @param bool   $cleanUtf8
8366
     *
8367
     * @return false|string
8368
     *
8369
     * @see UTF8::strstr()
8370
     */
8371 2
    public static function strchr(
8372
        string $haystack,
8373
        string $needle,
8374
        bool $before_needle = false,
8375
        string $encoding = 'UTF-8',
8376
        bool $cleanUtf8 = false
8377
    ) {
8378 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8379
    }
8380
8381
    /**
8382
     * Case-sensitive string comparison.
8383
     *
8384
     * @param string $str1 <p>The first string.</p>
8385
     * @param string $str2 <p>The second string.</p>
8386
     *
8387
     * @return int
8388
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8389
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8390
     *             <strong>0</strong> if they are equal
8391
     */
8392 29
    public static function strcmp(string $str1, string $str2): int
8393
    {
8394 29
        if ($str1 === $str2) {
8395 21
            return 0;
8396
        }
8397
8398 24
        return \strcmp(
8399 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8400 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
8401
        );
8402
    }
8403
8404
    /**
8405
     * Find length of initial segment not matching mask.
8406
     *
8407
     * @param string $str
8408
     * @param string $charList
8409
     * @param int    $offset
8410
     * @param int    $length
8411
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8412
     *
8413
     * @return int
8414
     */
8415 12
    public static function strcspn(
8416
        string $str,
8417
        string $charList,
8418
        int $offset = null,
8419
        int $length = null,
8420
        string $encoding = 'UTF-8'
8421
    ): int {
8422 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8423
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8424
        }
8425
8426 12
        if ($charList === '') {
8427 2
            return (int) self::strlen($str, $encoding);
8428
        }
8429
8430 11
        if ($offset !== null || $length !== null) {
8431 3
            if ($encoding === 'UTF-8') {
8432 3
                if ($length === null) {
8433
                    /** @noinspection UnnecessaryCastingInspection */
8434 2
                    $strTmp = \mb_substr($str, (int) $offset);
8435
                } else {
8436
                    /** @noinspection UnnecessaryCastingInspection */
8437 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8438
                }
8439
            } else {
8440
                /** @noinspection UnnecessaryCastingInspection */
8441
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8442
            }
8443
8444 3
            if ($strTmp === false) {
8445
                return 0;
8446
            }
8447
8448 3
            $str = $strTmp;
8449
        }
8450
8451 11
        if ($str === '') {
8452 2
            return 0;
8453
        }
8454
8455 10
        $matches = [];
8456 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8457 9
            $return = self::strlen($matches[1], $encoding);
8458 9
            if ($return === false) {
8459
                return 0;
8460
            }
8461
8462 9
            return $return;
8463
        }
8464
8465 2
        return (int) self::strlen($str, $encoding);
8466
    }
8467
8468
    /**
8469
     * alias for "UTF8::stristr()"
8470
     *
8471
     * @param string $haystack
8472
     * @param string $needle
8473
     * @param bool   $before_needle
8474
     * @param string $encoding
8475
     * @param bool   $cleanUtf8
8476
     *
8477
     * @return false|string
8478
     *
8479
     * @see UTF8::stristr()
8480
     */
8481 1
    public static function strichr(
8482
        string $haystack,
8483
        string $needle,
8484
        bool $before_needle = false,
8485
        string $encoding = 'UTF-8',
8486
        bool $cleanUtf8 = false
8487
    ) {
8488 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8489
    }
8490
8491
    /**
8492
     * Create a UTF-8 string from code points.
8493
     *
8494
     * INFO: opposite to UTF8::codepoints()
8495
     *
8496
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8497
     *
8498
     * @return string UTF-8 encoded string
8499
     */
8500 4
    public static function string(array $array): string
8501
    {
8502 4
        return \implode(
8503 4
            '',
8504 4
            \array_map(
8505
                [
8506 4
                    self::class,
8507
                    'chr',
8508
                ],
8509 4
                $array
8510
            )
8511
        );
8512
    }
8513
8514
    /**
8515
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8516
     *
8517
     * @param string $str <p>The input string.</p>
8518
     *
8519
     * @return bool
8520
     *              <strong>true</strong> if the string has BOM at the start,<br>
8521
     *              <strong>false</strong> otherwise
8522
     */
8523 6
    public static function string_has_bom(string $str): bool
8524
    {
8525
        /** @noinspection PhpUnusedLocalVariableInspection */
8526 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8527 6
            if (\strpos($str, $bomString) === 0) {
8528 6
                return true;
8529
            }
8530
        }
8531
8532 6
        return false;
8533
    }
8534
8535
    /**
8536
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8537
     *
8538
     * @see http://php.net/manual/en/function.strip-tags.php
8539
     *
8540
     * @param string $str            <p>
8541
     *                               The input string.
8542
     *                               </p>
8543
     * @param string $allowable_tags [optional] <p>
8544
     *                               You can use the optional second parameter to specify tags which should
8545
     *                               not be stripped.
8546
     *                               </p>
8547
     *                               <p>
8548
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8549
     *                               can not be changed with allowable_tags.
8550
     *                               </p>
8551
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8552
     *
8553
     * @return string the stripped string
8554
     */
8555 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8556
    {
8557 4
        if ($str === '') {
8558 1
            return '';
8559
        }
8560
8561 4
        if ($cleanUtf8 === true) {
8562 2
            $str = self::clean($str);
8563
        }
8564
8565 4
        if ($allowable_tags === null) {
8566 4
            return \strip_tags($str);
8567
        }
8568
8569 2
        return \strip_tags($str, $allowable_tags);
8570
    }
8571
8572
    /**
8573
     * Strip all whitespace characters. This includes tabs and newline
8574
     * characters, as well as multibyte whitespace such as the thin space
8575
     * and ideographic space.
8576
     *
8577
     * @param string $str
8578
     *
8579
     * @return string
8580
     */
8581 36
    public static function strip_whitespace(string $str): string
8582
    {
8583 36
        if ($str === '') {
8584 3
            return '';
8585
        }
8586
8587 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8588
    }
8589
8590
    /**
8591
     * Finds position of first occurrence of a string within another, case insensitive.
8592
     *
8593
     * @see http://php.net/manual/en/function.mb-stripos.php
8594
     *
8595
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8596
     * @param string $needle    <p>The string to find in haystack.</p>
8597
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8598
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8599
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8600
     *
8601
     * @return false|int
8602
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8603
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8604
     */
8605 24
    public static function stripos(
8606
        string $haystack,
8607
        string $needle,
8608
        int $offset = 0,
8609
        $encoding = 'UTF-8',
8610
        bool $cleanUtf8 = false
8611
    ) {
8612 24
        if ($haystack === '' || $needle === '') {
8613 5
            return false;
8614
        }
8615
8616 23
        if ($cleanUtf8 === true) {
8617
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8618
            // if invalid characters are found in $haystack before $needle
8619 1
            $haystack = self::clean($haystack);
8620 1
            $needle = self::clean($needle);
8621
        }
8622
8623 23
        if (self::$SUPPORT['mbstring'] === true) {
8624 23
            if ($encoding === 'UTF-8') {
8625 23
                return \mb_stripos($haystack, $needle, $offset);
8626
            }
8627
8628 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8629
8630 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8631
        }
8632
8633 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8634
8635
        if (
8636 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8637
            &&
8638 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8639
            &&
8640 2
            self::$SUPPORT['intl'] === true
8641
        ) {
8642
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8643
            if ($returnTmp !== false) {
8644
                return $returnTmp;
8645
            }
8646
        }
8647
8648
        //
8649
        // fallback for ascii only
8650
        //
8651
8652 2
        if (self::is_ascii($haystack . $needle)) {
8653
            return \stripos($haystack, $needle, $offset);
8654
        }
8655
8656
        //
8657
        // fallback via vanilla php
8658
        //
8659
8660 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8661 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8662
8663 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8664
    }
8665
8666
    /**
8667
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8668
     *
8669
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8670
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8671
     * @param bool   $before_needle [optional] <p>
8672
     *                              If <b>TRUE</b>, it returns the part of the
8673
     *                              haystack before the first occurrence of the needle (excluding the needle).
8674
     *                              </p>
8675
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8676
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8677
     *
8678
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8679
     */
8680 12
    public static function stristr(
8681
        string $haystack,
8682
        string $needle,
8683
        bool $before_needle = false,
8684
        string $encoding = 'UTF-8',
8685
        bool $cleanUtf8 = false
8686
    ) {
8687 12
        if ($haystack === '' || $needle === '') {
8688 3
            return false;
8689
        }
8690
8691 9
        if ($cleanUtf8 === true) {
8692
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8693
            // if invalid characters are found in $haystack before $needle
8694 1
            $needle = self::clean($needle);
8695 1
            $haystack = self::clean($haystack);
8696
        }
8697
8698 9
        if (!$needle) {
8699
            return $haystack;
8700
        }
8701
8702 9
        if (self::$SUPPORT['mbstring'] === true) {
8703 9
            if ($encoding === 'UTF-8') {
8704 9
                return \mb_stristr($haystack, $needle, $before_needle);
8705
            }
8706
8707 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8708
8709 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8710
        }
8711
8712
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8713
8714
        if (
8715
            $encoding !== 'UTF-8'
8716
            &&
8717
            self::$SUPPORT['mbstring'] === false
8718
        ) {
8719
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8720
        }
8721
8722
        if (
8723
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8724
            &&
8725
            self::$SUPPORT['intl'] === true
8726
        ) {
8727
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8728
            if ($returnTmp !== false) {
8729
                return $returnTmp;
8730
            }
8731
        }
8732
8733
        if (self::is_ascii($needle . $haystack)) {
8734
            return \stristr($haystack, $needle, $before_needle);
8735
        }
8736
8737
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8738
8739
        if (!isset($match[1])) {
8740
            return false;
8741
        }
8742
8743
        if ($before_needle) {
8744
            return $match[1];
8745
        }
8746
8747
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8748
    }
8749
8750
    /**
8751
     * Get the string length, not the byte-length!
8752
     *
8753
     * @see http://php.net/manual/en/function.mb-strlen.php
8754
     *
8755
     * @param string $str       <p>The string being checked for length.</p>
8756
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8757
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8758
     *
8759
     * @return false|int
8760
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8761
     *                   $encoding.
8762
     *                   (One multi-byte character counted as +1).
8763
     *                   <br>
8764
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8765
     *                   chars.
8766
     */
8767 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8768
    {
8769 173
        if ($str === '') {
8770 21
            return 0;
8771
        }
8772
8773 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8774 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8775
        }
8776
8777 171
        if ($cleanUtf8 === true) {
8778
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8779
            // if invalid characters are found in $str
8780 4
            $str = self::clean($str);
8781
        }
8782
8783
        //
8784
        // fallback via mbstring
8785
        //
8786
8787 171
        if (self::$SUPPORT['mbstring'] === true) {
8788 165
            if ($encoding === 'UTF-8') {
8789 165
                return \mb_strlen($str);
8790
            }
8791
8792 4
            return \mb_strlen($str, $encoding);
8793
        }
8794
8795
        //
8796
        // fallback for binary || ascii only
8797
        //
8798
8799
        if (
8800 8
            $encoding === 'CP850'
8801
            ||
8802 8
            $encoding === 'ASCII'
8803
        ) {
8804
            return \strlen($str);
8805
        }
8806
8807
        if (
8808 8
            $encoding !== 'UTF-8'
8809
            &&
8810 8
            self::$SUPPORT['mbstring'] === false
8811
            &&
8812 8
            self::$SUPPORT['iconv'] === false
8813
        ) {
8814 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8815
        }
8816
8817
        //
8818
        // fallback via iconv
8819
        //
8820
8821 8
        if (self::$SUPPORT['iconv'] === true) {
8822
            $returnTmp = \iconv_strlen($str, $encoding);
8823
            if ($returnTmp !== false) {
8824
                return $returnTmp;
8825
            }
8826
        }
8827
8828
        //
8829
        // fallback via intl
8830
        //
8831
8832
        if (
8833 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8834
            &&
8835 8
            self::$SUPPORT['intl'] === true
8836
        ) {
8837
            $returnTmp = \grapheme_strlen($str);
8838
            if ($returnTmp !== null) {
8839
                return $returnTmp;
8840
            }
8841
        }
8842
8843
        //
8844
        // fallback for ascii only
8845
        //
8846
8847 8
        if (self::is_ascii($str)) {
8848 4
            return \strlen($str);
8849
        }
8850
8851
        //
8852
        // fallback via vanilla php
8853
        //
8854
8855 8
        \preg_match_all('/./us', $str, $parts);
8856
8857 8
        $returnTmp = \count($parts[0]);
8858 8
        if ($returnTmp === 0) {
8859
            return false;
8860
        }
8861
8862 8
        return $returnTmp;
8863
    }
8864
8865
    /**
8866
     * Get string length in byte.
8867
     *
8868
     * @param string $str
8869
     *
8870
     * @return int
8871
     */
8872
    public static function strlen_in_byte(string $str): int
8873
    {
8874
        if ($str === '') {
8875
            return 0;
8876
        }
8877
8878
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8879
            // "mb_" is available if overload is used, so use it ...
8880
            return \mb_strlen($str, 'CP850'); // 8-BIT
8881
        }
8882
8883
        return \strlen($str);
8884
    }
8885
8886
    /**
8887
     * Case insensitive string comparisons using a "natural order" algorithm.
8888
     *
8889
     * INFO: natural order version of UTF8::strcasecmp()
8890
     *
8891
     * @param string $str1     <p>The first string.</p>
8892
     * @param string $str2     <p>The second string.</p>
8893
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8894
     *
8895
     * @return int
8896
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8897
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8898
     *             <strong>0</strong> if they are equal
8899
     */
8900 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8901
    {
8902 2
        return self::strnatcmp(
8903 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8904 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8905
        );
8906
    }
8907
8908
    /**
8909
     * String comparisons using a "natural order" algorithm
8910
     *
8911
     * INFO: natural order version of UTF8::strcmp()
8912
     *
8913
     * @see http://php.net/manual/en/function.strnatcmp.php
8914
     *
8915
     * @param string $str1 <p>The first string.</p>
8916
     * @param string $str2 <p>The second string.</p>
8917
     *
8918
     * @return int
8919
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8920
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8921
     *             <strong>0</strong> if they are equal
8922
     */
8923 4
    public static function strnatcmp(string $str1, string $str2): int
8924
    {
8925 4
        if ($str1 === $str2) {
8926 4
            return 0;
8927
        }
8928
8929 4
        return \strnatcmp(
8930 4
            (string) self::strtonatfold($str1),
8931 4
            (string) self::strtonatfold($str2)
8932
        );
8933
    }
8934
8935
    /**
8936
     * Case-insensitive string comparison of the first n characters.
8937
     *
8938
     * @see http://php.net/manual/en/function.strncasecmp.php
8939
     *
8940
     * @param string $str1     <p>The first string.</p>
8941
     * @param string $str2     <p>The second string.</p>
8942
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8943
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8944
     *
8945
     * @return int
8946
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8947
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8948
     *             <strong>0</strong> if they are equal
8949
     */
8950 2
    public static function strncasecmp(
8951
        string $str1,
8952
        string $str2,
8953
        int $len,
8954
        string $encoding = 'UTF-8'
8955
    ): int {
8956 2
        return self::strncmp(
8957 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8958 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8959 2
            $len
8960
        );
8961
    }
8962
8963
    /**
8964
     * String comparison of the first n characters.
8965
     *
8966
     * @see http://php.net/manual/en/function.strncmp.php
8967
     *
8968
     * @param string $str1     <p>The first string.</p>
8969
     * @param string $str2     <p>The second string.</p>
8970
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8971
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8972
     *
8973
     * @return int
8974
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8975
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8976
     *             <strong>0</strong> if they are equal
8977
     */
8978 4
    public static function strncmp(
8979
        string $str1,
8980
        string $str2,
8981
        int $len,
8982
        string $encoding = 'UTF-8'
8983
    ): int {
8984 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8985
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8986
        }
8987
8988 4
        if ($encoding === 'UTF-8') {
8989 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8990 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8991
        } else {
8992
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8993
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8994
        }
8995
8996 4
        return self::strcmp($str1, $str2);
8997
    }
8998
8999
    /**
9000
     * Search a string for any of a set of characters.
9001
     *
9002
     * @see http://php.net/manual/en/function.strpbrk.php
9003
     *
9004
     * @param string $haystack  <p>The string where char_list is looked for.</p>
9005
     * @param string $char_list <p>This parameter is case sensitive.</p>
9006
     *
9007
     * @return false|string string starting from the character found, or false if it is not found
9008
     */
9009 2
    public static function strpbrk(string $haystack, string $char_list)
9010
    {
9011 2
        if ($haystack === '' || $char_list === '') {
9012 2
            return false;
9013
        }
9014
9015 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
9016 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
9017
        }
9018
9019 2
        return false;
9020
    }
9021
9022
    /**
9023
     * Find position of first occurrence of string in a string.
9024
     *
9025
     * @see http://php.net/manual/en/function.mb-strpos.php
9026
     *
9027
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
9028
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9029
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
9030
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9031
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9032
     *
9033
     * @return false|int
9034
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
9035
     *                   string.<br> If needle is not found it returns false.
9036
     */
9037 53
    public static function strpos(
9038
        string $haystack,
9039
        $needle,
9040
        int $offset = 0,
9041
        $encoding = 'UTF-8',
9042
        bool $cleanUtf8 = false
9043
    ) {
9044 53
        if ($haystack === '') {
9045 4
            return false;
9046
        }
9047
9048
        // iconv and mbstring do not support integer $needle
9049 52
        if ((int) $needle === $needle) {
9050
            $needle = (string) self::chr($needle);
9051
        }
9052 52
        $needle = (string) $needle;
9053
9054 52
        if ($needle === '') {
9055 2
            return false;
9056
        }
9057
9058 52
        if ($cleanUtf8 === true) {
9059
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9060
            // if invalid characters are found in $haystack before $needle
9061 3
            $needle = self::clean($needle);
9062 3
            $haystack = self::clean($haystack);
9063
        }
9064
9065 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9066 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9067
        }
9068
9069
        //
9070
        // fallback via mbstring
9071
        //
9072
9073 52
        if (self::$SUPPORT['mbstring'] === true) {
9074 50
            if ($encoding === 'UTF-8') {
9075 50
                return \mb_strpos($haystack, $needle, $offset);
9076
            }
9077
9078 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
9079
        }
9080
9081
        //
9082
        // fallback for binary || ascii only
9083
        //
9084
        if (
9085 4
            $encoding === 'CP850'
9086
            ||
9087 4
            $encoding === 'ASCII'
9088
        ) {
9089 2
            return \strpos($haystack, $needle, $offset);
9090
        }
9091
9092
        if (
9093 4
            $encoding !== 'UTF-8'
9094
            &&
9095 4
            self::$SUPPORT['iconv'] === false
9096
            &&
9097 4
            self::$SUPPORT['mbstring'] === false
9098
        ) {
9099 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9100
        }
9101
9102
        //
9103
        // fallback via intl
9104
        //
9105
9106
        if (
9107 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
9108
            &&
9109 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
9110
            &&
9111 4
            self::$SUPPORT['intl'] === true
9112
        ) {
9113
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
9114
            if ($returnTmp !== false) {
9115
                return $returnTmp;
9116
            }
9117
        }
9118
9119
        //
9120
        // fallback via iconv
9121
        //
9122
9123
        if (
9124 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
9125
            &&
9126 4
            self::$SUPPORT['iconv'] === true
9127
        ) {
9128
            // ignore invalid negative offset to keep compatibility
9129
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9130
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9131
            if ($returnTmp !== false) {
9132
                return $returnTmp;
9133
            }
9134
        }
9135
9136
        //
9137
        // fallback for ascii only
9138
        //
9139
9140 4
        if (self::is_ascii($haystack . $needle)) {
9141 2
            return \strpos($haystack, $needle, $offset);
9142
        }
9143
9144
        //
9145
        // fallback via vanilla php
9146
        //
9147
9148 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9149 4
        if ($haystackTmp === false) {
9150
            $haystackTmp = '';
9151
        }
9152 4
        $haystack = (string) $haystackTmp;
9153
9154 4
        if ($offset < 0) {
9155
            $offset = 0;
9156
        }
9157
9158 4
        $pos = \strpos($haystack, $needle);
9159 4
        if ($pos === false) {
9160 2
            return false;
9161
        }
9162
9163 4
        if ($pos) {
9164 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9165
        }
9166
9167 2
        return $offset + 0;
9168
    }
9169
9170
    /**
9171
     * Find position of first occurrence of string in a string.
9172
     *
9173
     * @param string $haystack <p>
9174
     *                         The string being checked.
9175
     *                         </p>
9176
     * @param string $needle   <p>
9177
     *                         The position counted from the beginning of haystack.
9178
     *                         </p>
9179
     * @param int    $offset   [optional] <p>
9180
     *                         The search offset. If it is not specified, 0 is used.
9181
     *                         </p>
9182
     *
9183
     * @return false|int The numeric position of the first occurrence of needle in the
9184
     *                   haystack string. If needle is not found, it returns false.
9185
     */
9186
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9187
    {
9188
        if ($haystack === '' || $needle === '') {
9189
            return false;
9190
        }
9191
9192
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9193
            // "mb_" is available if overload is used, so use it ...
9194
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9195
        }
9196
9197
        return \strpos($haystack, $needle, $offset);
9198
    }
9199
9200
    /**
9201
     * Finds the last occurrence of a character in a string within another.
9202
     *
9203
     * @see http://php.net/manual/en/function.mb-strrchr.php
9204
     *
9205
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9206
     * @param string $needle        <p>The string to find in haystack</p>
9207
     * @param bool   $before_needle [optional] <p>
9208
     *                              Determines which portion of haystack
9209
     *                              this function returns.
9210
     *                              If set to true, it returns all of haystack
9211
     *                              from the beginning to the last occurrence of needle.
9212
     *                              If set to false, it returns all of haystack
9213
     *                              from the last occurrence of needle to the end,
9214
     *                              </p>
9215
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9216
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9217
     *
9218
     * @return false|string the portion of haystack or false if needle is not found
9219
     */
9220 2
    public static function strrchr(
9221
        string $haystack,
9222
        string $needle,
9223
        bool $before_needle = false,
9224
        string $encoding = 'UTF-8',
9225
        bool $cleanUtf8 = false
9226
    ) {
9227 2
        if ($haystack === '' || $needle === '') {
9228 2
            return false;
9229
        }
9230
9231 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9232 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9233
        }
9234
9235 2
        if ($cleanUtf8 === true) {
9236
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9237
            // if invalid characters are found in $haystack before $needle
9238 2
            $needle = self::clean($needle);
9239 2
            $haystack = self::clean($haystack);
9240
        }
9241
9242
        //
9243
        // fallback via mbstring
9244
        //
9245
9246 2
        if (self::$SUPPORT['mbstring'] === true) {
9247 2
            if ($encoding === 'UTF-8') {
9248 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9249
            }
9250
9251 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9252
        }
9253
9254
        //
9255
        // fallback for binary || ascii only
9256
        //
9257
9258
        if (
9259
            $before_needle === false
9260
            &&
9261
            (
9262
                $encoding === 'CP850'
9263
                ||
9264
                $encoding === 'ASCII'
9265
            )
9266
        ) {
9267
            return \strrchr($haystack, $needle);
9268
        }
9269
9270
        if (
9271
            $encoding !== 'UTF-8'
9272
            &&
9273
            self::$SUPPORT['mbstring'] === false
9274
        ) {
9275
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9276
        }
9277
9278
        //
9279
        // fallback via iconv
9280
        //
9281
9282
        if (self::$SUPPORT['iconv'] === true) {
9283
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9284
            if ($needleTmp === false) {
9285
                return false;
9286
            }
9287
            $needle = (string) $needleTmp;
9288
9289
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9290
            if ($pos === false) {
9291
                return false;
9292
            }
9293
9294
            if ($before_needle) {
9295
                return self::substr($haystack, 0, $pos, $encoding);
9296
            }
9297
9298
            return self::substr($haystack, $pos, null, $encoding);
9299
        }
9300
9301
        //
9302
        // fallback via vanilla php
9303
        //
9304
9305
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9306
        if ($needleTmp === false) {
9307
            return false;
9308
        }
9309
        $needle = (string) $needleTmp;
9310
9311
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9312
        if ($pos === false) {
9313
            return false;
9314
        }
9315
9316
        if ($before_needle) {
9317
            return self::substr($haystack, 0, $pos, $encoding);
9318
        }
9319
9320
        return self::substr($haystack, $pos, null, $encoding);
9321
    }
9322
9323
    /**
9324
     * Reverses characters order in the string.
9325
     *
9326
     * @param string $str      <p>The input string.</p>
9327
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9328
     *
9329
     * @return string the string with characters in the reverse sequence
9330
     */
9331 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9332
    {
9333 10
        if ($str === '') {
9334 4
            return '';
9335
        }
9336
9337
        // init
9338 8
        $reversed = '';
9339
9340 8
        $str = self::emoji_encode($str, true);
9341
9342 8
        if ($encoding === 'UTF-8') {
9343 8
            if (self::$SUPPORT['intl'] === true) {
9344
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9345 8
                $i = (int) \grapheme_strlen($str);
9346 8
                while ($i--) {
9347 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9348 8
                    if ($reversedTmp !== false) {
9349 8
                        $reversed .= $reversedTmp;
9350
                    }
9351
                }
9352
            } else {
9353
                $i = (int) \mb_strlen($str);
9354 8
                while ($i--) {
9355
                    $reversedTmp = \mb_substr($str, $i, 1);
9356
                    if ($reversedTmp !== false) {
9357
                        $reversed .= $reversedTmp;
9358
                    }
9359
                }
9360
            }
9361
        } else {
9362
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9363
9364
            $i = (int) self::strlen($str, $encoding);
9365
            while ($i--) {
9366
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9367
                if ($reversedTmp !== false) {
9368
                    $reversed .= $reversedTmp;
9369
                }
9370
            }
9371
        }
9372
9373 8
        return self::emoji_decode($reversed, true);
9374
    }
9375
9376
    /**
9377
     * Finds the last occurrence of a character in a string within another, case insensitive.
9378
     *
9379
     * @see http://php.net/manual/en/function.mb-strrichr.php
9380
     *
9381
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9382
     * @param string $needle        <p>The string to find in haystack.</p>
9383
     * @param bool   $before_needle [optional] <p>
9384
     *                              Determines which portion of haystack
9385
     *                              this function returns.
9386
     *                              If set to true, it returns all of haystack
9387
     *                              from the beginning to the last occurrence of needle.
9388
     *                              If set to false, it returns all of haystack
9389
     *                              from the last occurrence of needle to the end,
9390
     *                              </p>
9391
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9392
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9393
     *
9394
     * @return false|string the portion of haystack or<br>false if needle is not found
9395
     */
9396 3
    public static function strrichr(
9397
        string $haystack,
9398
        string $needle,
9399
        bool $before_needle = false,
9400
        string $encoding = 'UTF-8',
9401
        bool $cleanUtf8 = false
9402
    ) {
9403 3
        if ($haystack === '' || $needle === '') {
9404 2
            return false;
9405
        }
9406
9407 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9408 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9409
        }
9410
9411 3
        if ($cleanUtf8 === true) {
9412
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9413
            // if invalid characters are found in $haystack before $needle
9414 2
            $needle = self::clean($needle);
9415 2
            $haystack = self::clean($haystack);
9416
        }
9417
9418
        //
9419
        // fallback via mbstring
9420
        //
9421
9422 3
        if (self::$SUPPORT['mbstring'] === true) {
9423 3
            if ($encoding === 'UTF-8') {
9424 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9425
            }
9426
9427 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9428
        }
9429
9430
        //
9431
        // fallback via vanilla php
9432
        //
9433
9434
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9435
        if ($needleTmp === false) {
9436
            return false;
9437
        }
9438
        $needle = (string) $needleTmp;
9439
9440
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9441
        if ($pos === false) {
9442
            return false;
9443
        }
9444
9445
        if ($before_needle) {
9446
            return self::substr($haystack, 0, $pos, $encoding);
9447
        }
9448
9449
        return self::substr($haystack, $pos, null, $encoding);
9450
    }
9451
9452
    /**
9453
     * Find position of last occurrence of a case-insensitive string.
9454
     *
9455
     * @param string     $haystack  <p>The string to look in.</p>
9456
     * @param int|string $needle    <p>The string to look for.</p>
9457
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9458
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9459
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9460
     *
9461
     * @return false|int
9462
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9463
     *                   string.<br>If needle is not found, it returns false.
9464
     */
9465 3
    public static function strripos(
9466
        string $haystack,
9467
        $needle,
9468
        int $offset = 0,
9469
        string $encoding = 'UTF-8',
9470
        bool $cleanUtf8 = false
9471
    ) {
9472 3
        if ($haystack === '') {
9473
            return false;
9474
        }
9475
9476
        // iconv and mbstring do not support integer $needle
9477 3
        if ((int) $needle === $needle && $needle >= 0) {
9478
            $needle = (string) self::chr($needle);
9479
        }
9480 3
        $needle = (string) $needle;
9481
9482 3
        if ($needle === '') {
9483
            return false;
9484
        }
9485
9486 3
        if ($cleanUtf8 === true) {
9487
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9488 2
            $needle = self::clean($needle);
9489 2
            $haystack = self::clean($haystack);
9490
        }
9491
9492 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9493 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9494
        }
9495
9496
        //
9497
        // fallback via mbstrig
9498
        //
9499
9500 3
        if (self::$SUPPORT['mbstring'] === true) {
9501 3
            if ($encoding === 'UTF-8') {
9502 3
                return \mb_strripos($haystack, $needle, $offset);
9503
            }
9504
9505
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9506
        }
9507
9508
        //
9509
        // fallback for binary || ascii only
9510
        //
9511
9512
        if (
9513
            $encoding === 'CP850'
9514
            ||
9515
            $encoding === 'ASCII'
9516
        ) {
9517
            return \strripos($haystack, $needle, $offset);
9518
        }
9519
9520
        if (
9521
            $encoding !== 'UTF-8'
9522
            &&
9523
            self::$SUPPORT['mbstring'] === false
9524
        ) {
9525
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9526
        }
9527
9528
        //
9529
        // fallback via intl
9530
        //
9531
9532
        if (
9533
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9534
            &&
9535
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9536
            &&
9537
            self::$SUPPORT['intl'] === true
9538
        ) {
9539
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9540
            if ($returnTmp !== false) {
9541
                return $returnTmp;
9542
            }
9543
        }
9544
9545
        //
9546
        // fallback for ascii only
9547
        //
9548
9549
        if (self::is_ascii($haystack . $needle)) {
9550
            return \strripos($haystack, $needle, $offset);
9551
        }
9552
9553
        //
9554
        // fallback via vanilla php
9555
        //
9556
9557
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9558
        $needle = self::strtocasefold($needle, true, false, $encoding);
9559
9560
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9561
    }
9562
9563
    /**
9564
     * Finds position of last occurrence of a string within another, case insensitive.
9565
     *
9566
     * @param string $haystack <p>
9567
     *                         The string from which to get the position of the last occurrence
9568
     *                         of needle.
9569
     *                         </p>
9570
     * @param string $needle   <p>
9571
     *                         The string to find in haystack.
9572
     *                         </p>
9573
     * @param int    $offset   [optional] <p>
9574
     *                         The position in haystack
9575
     *                         to start searching.
9576
     *                         </p>
9577
     *
9578
     * @return false|int return the numeric position of the last occurrence of needle in the
9579
     *                   haystack string, or false if needle is not found
9580
     */
9581
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9582
    {
9583
        if ($haystack === '' || $needle === '') {
9584
            return false;
9585
        }
9586
9587
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9588
            // "mb_" is available if overload is used, so use it ...
9589
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9590
        }
9591
9592
        return \strripos($haystack, $needle, $offset);
9593
    }
9594
9595
    /**
9596
     * Find position of last occurrence of a string in a string.
9597
     *
9598
     * @see http://php.net/manual/en/function.mb-strrpos.php
9599
     *
9600
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9601
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9602
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9603
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9604
     *                              the end of the string.
9605
     *                              </p>
9606
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9607
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9608
     *
9609
     * @return false|int
9610
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9611
     *                   string.<br>If needle is not found, it returns false.
9612
     */
9613 35
    public static function strrpos(
9614
        string $haystack,
9615
        $needle,
9616
        int $offset = 0,
9617
        string $encoding = 'UTF-8',
9618
        bool $cleanUtf8 = false
9619
    ) {
9620 35
        if ($haystack === '') {
9621 3
            return false;
9622
        }
9623
9624
        // iconv and mbstring do not support integer $needle
9625 34
        if ((int) $needle === $needle && $needle >= 0) {
9626 2
            $needle = (string) self::chr($needle);
9627
        }
9628 34
        $needle = (string) $needle;
9629
9630 34
        if ($needle === '') {
9631 2
            return false;
9632
        }
9633
9634 34
        if ($cleanUtf8 === true) {
9635
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9636 4
            $needle = self::clean($needle);
9637 4
            $haystack = self::clean($haystack);
9638
        }
9639
9640 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9641 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9642
        }
9643
9644
        //
9645
        // fallback via mbstring
9646
        //
9647
9648 34
        if (self::$SUPPORT['mbstring'] === true) {
9649 34
            if ($encoding === 'UTF-8') {
9650 34
                return \mb_strrpos($haystack, $needle, $offset);
9651
            }
9652
9653 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9654
        }
9655
9656
        //
9657
        // fallback for binary || ascii only
9658
        //
9659
9660
        if (
9661
            $encoding === 'CP850'
9662
            ||
9663
            $encoding === 'ASCII'
9664
        ) {
9665
            return \strrpos($haystack, $needle, $offset);
9666
        }
9667
9668
        if (
9669
            $encoding !== 'UTF-8'
9670
            &&
9671
            self::$SUPPORT['mbstring'] === false
9672
        ) {
9673
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9674
        }
9675
9676
        //
9677
        // fallback via intl
9678
        //
9679
9680
        if (
9681
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9682
            &&
9683
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9684
            &&
9685
            self::$SUPPORT['intl'] === true
9686
        ) {
9687
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9688
            if ($returnTmp !== false) {
9689
                return $returnTmp;
9690
            }
9691
        }
9692
9693
        //
9694
        // fallback for ascii only
9695
        //
9696
9697
        if (self::is_ascii($haystack . $needle)) {
9698
            return \strrpos($haystack, $needle, $offset);
9699
        }
9700
9701
        //
9702
        // fallback via vanilla php
9703
        //
9704
9705
        $haystackTmp = null;
9706
        if ($offset > 0) {
9707
            $haystackTmp = self::substr($haystack, $offset);
9708
        } elseif ($offset < 0) {
9709
            $haystackTmp = self::substr($haystack, 0, $offset);
9710
            $offset = 0;
9711
        }
9712
9713
        if ($haystackTmp !== null) {
9714
            if ($haystackTmp === false) {
9715
                $haystackTmp = '';
9716
            }
9717
            $haystack = (string) $haystackTmp;
9718
        }
9719
9720
        $pos = \strrpos($haystack, $needle);
9721
        if ($pos === false) {
9722
            return false;
9723
        }
9724
9725
        $strTmp = \substr($haystack, 0, $pos);
9726
        if ($strTmp === false) {
9727
            return false;
9728
        }
9729
9730
        return $offset + (int) self::strlen($strTmp);
9731
    }
9732
9733
    /**
9734
     * Find position of last occurrence of a string in a string.
9735
     *
9736
     * @param string $haystack <p>
9737
     *                         The string being checked, for the last occurrence
9738
     *                         of needle.
9739
     *                         </p>
9740
     * @param string $needle   <p>
9741
     *                         The string to find in haystack.
9742
     *                         </p>
9743
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9744
     *                         the string. Negative values will stop searching at an arbitrary point
9745
     *                         prior to the end of the string.
9746
     *
9747
     * @return false|int The numeric position of the last occurrence of needle in the
9748
     *                   haystack string. If needle is not found, it returns false.
9749
     */
9750
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9751
    {
9752
        if ($haystack === '' || $needle === '') {
9753
            return false;
9754
        }
9755
9756
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9757
            // "mb_" is available if overload is used, so use it ...
9758
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9759
        }
9760
9761
        return \strrpos($haystack, $needle, $offset);
9762
    }
9763
9764
    /**
9765
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9766
     * mask.
9767
     *
9768
     * @param string $str      <p>The input string.</p>
9769
     * @param string $mask     <p>The mask of chars</p>
9770
     * @param int    $offset   [optional]
9771
     * @param int    $length   [optional]
9772
     * @param string $encoding [optional] <p>Set the charset.</p>
9773
     *
9774
     * @return false|int
9775
     */
9776 10
    public static function strspn(
9777
        string $str,
9778
        string $mask,
9779
        int $offset = 0,
9780
        int $length = null,
9781
        string $encoding = 'UTF-8'
9782
    ) {
9783 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9784
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9785
        }
9786
9787 10
        if ($offset || $length !== null) {
9788 2
            if ($encoding === 'UTF-8') {
9789 2
                if ($length === null) {
9790
                    $str = (string) \mb_substr($str, $offset);
9791
                } else {
9792 2
                    $str = (string) \mb_substr($str, $offset, $length);
9793
                }
9794
            } else {
9795
                $str = (string) self::substr($str, $offset, $length, $encoding);
9796
            }
9797
        }
9798
9799 10
        if ($str === '' || $mask === '') {
9800 2
            return 0;
9801
        }
9802
9803 8
        $matches = [];
9804
9805 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9806
    }
9807
9808
    /**
9809
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9810
     *
9811
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9812
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9813
     * @param bool   $before_needle [optional] <p>
9814
     *                              If <b>TRUE</b>, strstr() returns the part of the
9815
     *                              haystack before the first occurrence of the needle (excluding the needle).
9816
     *                              </p>
9817
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9818
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9819
     *
9820
     * @return false|string
9821
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9822
     */
9823 3
    public static function strstr(
9824
        string $haystack,
9825
        string $needle,
9826
        bool $before_needle = false,
9827
        string $encoding = 'UTF-8',
9828
        $cleanUtf8 = false
9829
    ) {
9830 3
        if ($haystack === '' || $needle === '') {
9831 2
            return false;
9832
        }
9833
9834 3
        if ($cleanUtf8 === true) {
9835
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9836
            // if invalid characters are found in $haystack before $needle
9837
            $needle = self::clean($needle);
9838
            $haystack = self::clean($haystack);
9839
        }
9840
9841 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9842 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9843
        }
9844
9845
        //
9846
        // fallback via mbstring
9847
        //
9848
9849 3
        if (self::$SUPPORT['mbstring'] === true) {
9850 3
            if ($encoding === 'UTF-8') {
9851 3
                return \mb_strstr($haystack, $needle, $before_needle);
9852
            }
9853
9854 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9855
        }
9856
9857
        //
9858
        // fallback for binary || ascii only
9859
        //
9860
9861
        if (
9862
            $encoding === 'CP850'
9863
            ||
9864
            $encoding === 'ASCII'
9865
        ) {
9866
            return \strstr($haystack, $needle, $before_needle);
9867
        }
9868
9869
        if (
9870
            $encoding !== 'UTF-8'
9871
            &&
9872
            self::$SUPPORT['mbstring'] === false
9873
        ) {
9874
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9875
        }
9876
9877
        //
9878
        // fallback via intl
9879
        //
9880
9881
        if (
9882
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9883
            &&
9884
            self::$SUPPORT['intl'] === true
9885
        ) {
9886
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9887
            if ($returnTmp !== false) {
9888
                return $returnTmp;
9889
            }
9890
        }
9891
9892
        //
9893
        // fallback for ascii only
9894
        //
9895
9896
        if (self::is_ascii($haystack . $needle)) {
9897
            return \strstr($haystack, $needle, $before_needle);
9898
        }
9899
9900
        //
9901
        // fallback via vanilla php
9902
        //
9903
9904
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9905
9906
        if (!isset($match[1])) {
9907
            return false;
9908
        }
9909
9910
        if ($before_needle) {
9911
            return $match[1];
9912
        }
9913
9914
        return self::substr($haystack, (int) self::strlen($match[1]));
9915
    }
9916
9917
    /**
9918
     *  * Finds first occurrence of a string within another.
9919
     *
9920
     * @param string $haystack      <p>
9921
     *                              The string from which to get the first occurrence
9922
     *                              of needle.
9923
     *                              </p>
9924
     * @param string $needle        <p>
9925
     *                              The string to find in haystack.
9926
     *                              </p>
9927
     * @param bool   $before_needle [optional] <p>
9928
     *                              Determines which portion of haystack
9929
     *                              this function returns.
9930
     *                              If set to true, it returns all of haystack
9931
     *                              from the beginning to the first occurrence of needle.
9932
     *                              If set to false, it returns all of haystack
9933
     *                              from the first occurrence of needle to the end,
9934
     *                              </p>
9935
     *
9936
     * @return false|string the portion of haystack,
9937
     *                      or false if needle is not found
9938
     */
9939
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9940
    {
9941
        if ($haystack === '' || $needle === '') {
9942
            return false;
9943
        }
9944
9945
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9946
            // "mb_" is available if overload is used, so use it ...
9947
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9948
        }
9949
9950
        return \strstr($haystack, $needle, $before_needle);
9951
    }
9952
9953
    /**
9954
     * Unicode transformation for case-less matching.
9955
     *
9956
     * @see http://unicode.org/reports/tr21/tr21-5.html
9957
     *
9958
     * @param string      $str       <p>The input string.</p>
9959
     * @param bool        $full      [optional] <p>
9960
     *                               <b>true</b>, replace full case folding chars (default)<br>
9961
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9962
     *                               </p>
9963
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9964
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9965
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9966
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9967
     *                               is for some languages better ...</p>
9968
     *
9969
     * @return string
9970
     */
9971 32
    public static function strtocasefold(
9972
        string $str,
9973
        bool $full = true,
9974
        bool $cleanUtf8 = false,
9975
        string $encoding = 'UTF-8',
9976
        string $lang = null,
9977
        $lower = true
9978
    ): string {
9979 32
        if ($str === '') {
9980 5
            return '';
9981
        }
9982
9983 31
        if ($cleanUtf8 === true) {
9984
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9985
            // if invalid characters are found in $haystack before $needle
9986 2
            $str = self::clean($str);
9987
        }
9988
9989 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9990
9991 31
        if ($lang === null && $encoding === 'UTF-8') {
9992 31
            if ($lower === true) {
9993 2
                return \mb_strtolower($str);
9994
            }
9995
9996 29
            return \mb_strtoupper($str);
9997
        }
9998
9999 2
        if ($lower === true) {
10000
            return self::strtolower($str, $encoding, false, $lang);
10001
        }
10002
10003 2
        return self::strtoupper($str, $encoding, false, $lang);
10004
    }
10005
10006
    /**
10007
     * Make a string lowercase.
10008
     *
10009
     * @see http://php.net/manual/en/function.mb-strtolower.php
10010
     *
10011
     * @param string      $str                   <p>The string being lowercased.</p>
10012
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10013
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10014
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10015
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10016
     *
10017
     * @return string
10018
     *                <p>String with all alphabetic characters converted to lowercase.</p>
10019
     */
10020 73
    public static function strtolower(
10021
        $str,
10022
        string $encoding = 'UTF-8',
10023
        bool $cleanUtf8 = false,
10024
        string $lang = null,
10025
        bool $tryToKeepStringLength = false
10026
    ): string {
10027
        // init
10028 73
        $str = (string) $str;
10029
10030 73
        if ($str === '') {
10031 1
            return '';
10032
        }
10033
10034 72
        if ($cleanUtf8 === true) {
10035
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10036
            // if invalid characters are found in $haystack before $needle
10037 2
            $str = self::clean($str);
10038
        }
10039
10040
        // hack for old php version or for the polyfill ...
10041 72
        if ($tryToKeepStringLength === true) {
10042
            $str = self::fixStrCaseHelper($str, true);
10043
        }
10044
10045 72
        if ($lang === null && $encoding === 'UTF-8') {
10046 13
            return \mb_strtolower($str);
10047
        }
10048
10049 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10050
10051 61
        if ($lang !== null) {
10052 2
            if (self::$SUPPORT['intl'] === true) {
10053 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10054
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10055
                }
10056
10057 2
                $langCode = $lang . '-Lower';
10058 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10059
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
10060
10061
                    $langCode = 'Any-Lower';
10062
                }
10063
10064
                /** @noinspection PhpComposerExtensionStubsInspection */
10065
                /** @noinspection UnnecessaryCastingInspection */
10066 2
                return (string) \transliterator_transliterate($langCode, $str);
10067
            }
10068
10069
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
10070
        }
10071
10072
        // always fallback via symfony polyfill
10073 61
        return \mb_strtolower($str, $encoding);
10074
    }
10075
10076
    /**
10077
     * Make a string uppercase.
10078
     *
10079
     * @see http://php.net/manual/en/function.mb-strtoupper.php
10080
     *
10081
     * @param string      $str                   <p>The string being uppercased.</p>
10082
     * @param string      $encoding              [optional] <p>Set the charset.</p>
10083
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10084
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10085
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10086
     *
10087
     * @return string
10088
     *                <p>String with all alphabetic characters converted to uppercase.</p>
10089
     */
10090 17
    public static function strtoupper(
10091
        $str,
10092
        string $encoding = 'UTF-8',
10093
        bool $cleanUtf8 = false,
10094
        string $lang = null,
10095
        bool $tryToKeepStringLength = false
10096
    ): string {
10097
        // init
10098 17
        $str = (string) $str;
10099
10100 17
        if ($str === '') {
10101 1
            return '';
10102
        }
10103
10104 16
        if ($cleanUtf8 === true) {
10105
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10106
            // if invalid characters are found in $haystack before $needle
10107 2
            $str = self::clean($str);
10108
        }
10109
10110
        // hack for old php version or for the polyfill ...
10111 16
        if ($tryToKeepStringLength === true) {
10112 2
            $str = self::fixStrCaseHelper($str, false);
10113
        }
10114
10115 16
        if ($lang === null && $encoding === 'UTF-8') {
10116 8
            return \mb_strtoupper($str);
10117
        }
10118
10119 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10120
10121 10
        if ($lang !== null) {
10122 2
            if (self::$SUPPORT['intl'] === true) {
10123 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
10124
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
10125
                }
10126
10127 2
                $langCode = $lang . '-Upper';
10128 2
                if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) {
10129
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
10130
10131
                    $langCode = 'Any-Upper';
10132
                }
10133
10134
                /** @noinspection PhpComposerExtensionStubsInspection */
10135
                /** @noinspection UnnecessaryCastingInspection */
10136 2
                return (string) \transliterator_transliterate($langCode, $str);
10137
            }
10138
10139
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10140
        }
10141
10142
        // always fallback via symfony polyfill
10143 10
        return \mb_strtoupper($str, $encoding);
10144
    }
10145
10146
    /**
10147
     * Translate characters or replace sub-strings.
10148
     *
10149
     * @see http://php.net/manual/en/function.strtr.php
10150
     *
10151
     * @param string          $str  <p>The string being translated.</p>
10152
     * @param string|string[] $from <p>The string replacing from.</p>
10153
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10154
     *
10155
     * @return string
10156
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10157
     *                corresponding character in to
10158
     */
10159 2
    public static function strtr(string $str, $from, $to = ''): string
10160
    {
10161 2
        if ($str === '') {
10162
            return '';
10163
        }
10164
10165 2
        if ($from === $to) {
10166
            return $str;
10167
        }
10168
10169 2
        if ($to !== '') {
10170 2
            $from = self::str_split($from);
10171 2
            $to = self::str_split($to);
10172 2
            $countFrom = \count($from);
10173 2
            $countTo = \count($to);
10174
10175 2
            if ($countFrom > $countTo) {
10176 2
                $from = \array_slice($from, 0, $countTo);
10177 2
            } elseif ($countFrom < $countTo) {
10178 2
                $to = \array_slice($to, 0, $countFrom);
10179
            }
10180
10181 2
            $from = \array_combine($from, $to);
10182 2
            if ($from === false) {
10183
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10184
            }
10185
        }
10186
10187 2
        if (\is_string($from)) {
10188 2
            return \str_replace($from, '', $str);
10189
        }
10190
10191 2
        return \strtr($str, $from);
10192
    }
10193
10194
    /**
10195
     * Return the width of a string.
10196
     *
10197
     * @param string $str       <p>The input string.</p>
10198
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10199
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10200
     *
10201
     * @return int
10202
     */
10203 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10204
    {
10205 2
        if ($str === '') {
10206 2
            return 0;
10207
        }
10208
10209 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10210 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10211
        }
10212
10213 2
        if ($cleanUtf8 === true) {
10214
            // iconv and mbstring are not tolerant to invalid encoding
10215
            // further, their behaviour is inconsistent with that of PHP's substr
10216 2
            $str = self::clean($str);
10217
        }
10218
10219
        //
10220
        // fallback via mbstring
10221
        //
10222
10223 2
        if (self::$SUPPORT['mbstring'] === true) {
10224 2
            if ($encoding === 'UTF-8') {
10225 2
                return \mb_strwidth($str);
10226
            }
10227
10228
            return \mb_strwidth($str, $encoding);
10229
        }
10230
10231
        //
10232
        // fallback via vanilla php
10233
        //
10234
10235
        if ($encoding !== 'UTF-8') {
10236
            $str = self::encode('UTF-8', $str, false, $encoding);
10237
        }
10238
10239
        $wide = 0;
10240
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10241
10242
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10243
    }
10244
10245
    /**
10246
     * Get part of a string.
10247
     *
10248
     * @see http://php.net/manual/en/function.mb-substr.php
10249
     *
10250
     * @param string $str       <p>The string being checked.</p>
10251
     * @param int    $offset    <p>The first position used in str.</p>
10252
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10253
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10254
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10255
     *
10256
     * @return false|string
10257
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10258
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10259
     *                      characters long, <b>FALSE</b> will be returned.
10260
     */
10261 172
    public static function substr(
10262
        string $str,
10263
        int $offset = 0,
10264
        int $length = null,
10265
        string $encoding = 'UTF-8',
10266
        bool $cleanUtf8 = false
10267
    ) {
10268
        // empty string
10269 172
        if ($str === '' || $length === 0) {
10270 8
            return '';
10271
        }
10272
10273 168
        if ($cleanUtf8 === true) {
10274
            // iconv and mbstring are not tolerant to invalid encoding
10275
            // further, their behaviour is inconsistent with that of PHP's substr
10276 2
            $str = self::clean($str);
10277
        }
10278
10279
        // whole string
10280 168
        if (!$offset && $length === null) {
10281 7
            return $str;
10282
        }
10283
10284 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10285 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10286
        }
10287
10288
        //
10289
        // fallback via mbstring
10290
        //
10291
10292 163
        if (self::$SUPPORT['mbstring'] === true) {
10293 161
            if ($encoding === 'UTF-8') {
10294 161
                if ($length === null) {
10295 64
                    return \mb_substr($str, $offset);
10296
                }
10297
10298 102
                return \mb_substr($str, $offset, $length);
10299
            }
10300
10301
            return self::substr($str, $offset, $length, $encoding);
10302
        }
10303
10304
        //
10305
        // fallback for binary || ascii only
10306
        //
10307
10308
        if (
10309 4
            $encoding === 'CP850'
10310
            ||
10311 4
            $encoding === 'ASCII'
10312
        ) {
10313
            if ($length === null) {
10314
                return \substr($str, $offset);
10315
            }
10316
10317
            return \substr($str, $offset, $length);
10318
        }
10319
10320
        // otherwise we need the string-length
10321 4
        $str_length = 0;
10322 4
        if ($offset || $length === null) {
10323 4
            $str_length = self::strlen($str, $encoding);
10324
        }
10325
10326
        // e.g.: invalid chars + mbstring not installed
10327 4
        if ($str_length === false) {
10328
            return false;
10329
        }
10330
10331
        // empty string
10332 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10333
            return '';
10334
        }
10335
10336
        // impossible
10337 4
        if ($offset && $offset > $str_length) {
10338
            return '';
10339
        }
10340
10341 4
        if ($length === null) {
10342 4
            $length = (int) $str_length;
10343
        } else {
10344 2
            $length = (int) $length;
10345
        }
10346
10347
        if (
10348 4
            $encoding !== 'UTF-8'
10349
            &&
10350 4
            self::$SUPPORT['mbstring'] === false
10351
        ) {
10352 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10353
        }
10354
10355
        //
10356
        // fallback via intl
10357
        //
10358
10359
        if (
10360 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10361
            &&
10362 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10363
            &&
10364 4
            self::$SUPPORT['intl'] === true
10365
        ) {
10366
            $returnTmp = \grapheme_substr($str, $offset, $length);
10367
            if ($returnTmp !== false) {
10368
                return $returnTmp;
10369
            }
10370
        }
10371
10372
        //
10373
        // fallback via iconv
10374
        //
10375
10376
        if (
10377 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10378
            &&
10379 4
            self::$SUPPORT['iconv'] === true
10380
        ) {
10381
            $returnTmp = \iconv_substr($str, $offset, $length);
10382
            if ($returnTmp !== false) {
10383
                return $returnTmp;
10384
            }
10385
        }
10386
10387
        //
10388
        // fallback for ascii only
10389
        //
10390
10391 4
        if (self::is_ascii($str)) {
10392
            return \substr($str, $offset, $length);
10393
        }
10394
10395
        //
10396
        // fallback via vanilla php
10397
        //
10398
10399
        // split to array, and remove invalid characters
10400 4
        $array = self::str_split($str);
10401
10402
        // extract relevant part, and join to make sting again
10403 4
        return \implode('', \array_slice($array, $offset, $length));
10404
    }
10405
10406
    /**
10407
     * Binary safe comparison of two strings from an offset, up to length characters.
10408
     *
10409
     * @param string   $str1               <p>The main string being compared.</p>
10410
     * @param string   $str2               <p>The secondary string being compared.</p>
10411
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10412
     *                                     counting from the end of the string.</p>
10413
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10414
     *                                     of the length of the str compared to the length of main_str less the
10415
     *                                     offset.</p>
10416
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10417
     *                                     insensitive.</p>
10418
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10419
     *
10420
     * @return int
10421
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10422
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10423
     *             <strong>0</strong> if they are equal
10424
     */
10425 2
    public static function substr_compare(
10426
        string $str1,
10427
        string $str2,
10428
        int $offset = 0,
10429
        int $length = null,
10430
        bool $case_insensitivity = false,
10431
        string $encoding = 'UTF-8'
10432
    ): int {
10433
        if (
10434 2
            $offset !== 0
10435
            ||
10436 2
            $length !== null
10437
        ) {
10438 2
            if ($encoding === 'UTF-8') {
10439 2
                if ($length === null) {
10440 2
                    $str1 = (string) \mb_substr($str1, $offset);
10441
                } else {
10442 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10443
                }
10444 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10445
            } else {
10446
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10447
10448
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10449
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10450
            }
10451
        }
10452
10453 2
        if ($case_insensitivity === true) {
10454 2
            return self::strcasecmp($str1, $str2, $encoding);
10455
        }
10456
10457 2
        return self::strcmp($str1, $str2);
10458
    }
10459
10460
    /**
10461
     * Count the number of substring occurrences.
10462
     *
10463
     * @see http://php.net/manual/en/function.substr-count.php
10464
     *
10465
     * @param string $haystack  <p>The string to search in.</p>
10466
     * @param string $needle    <p>The substring to search for.</p>
10467
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10468
     * @param int    $length    [optional] <p>
10469
     *                          The maximum length after the specified offset to search for the
10470
     *                          substring. It outputs a warning if the offset plus the length is
10471
     *                          greater than the haystack length.
10472
     *                          </p>
10473
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10474
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10475
     *
10476
     * @return false|int this functions returns an integer or false if there isn't a string
10477
     */
10478 5
    public static function substr_count(
10479
        string $haystack,
10480
        string $needle,
10481
        int $offset = 0,
10482
        int $length = null,
10483
        string $encoding = 'UTF-8',
10484
        bool $cleanUtf8 = false
10485
    ) {
10486 5
        if ($haystack === '' || $needle === '') {
10487 2
            return false;
10488
        }
10489
10490 5
        if ($length === 0) {
10491 2
            return 0;
10492
        }
10493
10494 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10495 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10496
        }
10497
10498 5
        if ($cleanUtf8 === true) {
10499
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10500
            // if invalid characters are found in $haystack before $needle
10501
            $needle = self::clean($needle);
10502
            $haystack = self::clean($haystack);
10503
        }
10504
10505 5
        if ($offset || $length > 0) {
10506 2
            if ($length === null) {
10507 2
                $lengthTmp = self::strlen($haystack, $encoding);
10508 2
                if ($lengthTmp === false) {
10509
                    return false;
10510
                }
10511 2
                $length = (int) $lengthTmp;
10512
            }
10513
10514 2
            if ($encoding === 'UTF-8') {
10515 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10516
            } else {
10517 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10518
            }
10519
        }
10520
10521
        if (
10522 5
            $encoding !== 'UTF-8'
10523
            &&
10524 5
            self::$SUPPORT['mbstring'] === false
10525
        ) {
10526
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10527
        }
10528
10529 5
        if (self::$SUPPORT['mbstring'] === true) {
10530 5
            if ($encoding === 'UTF-8') {
10531 5
                return \mb_substr_count($haystack, $needle);
10532
            }
10533
10534 2
            return \mb_substr_count($haystack, $needle, $encoding);
10535
        }
10536
10537
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10538
10539
        return \count($matches);
10540
    }
10541
10542
    /**
10543
     * Count the number of substring occurrences.
10544
     *
10545
     * @param string $haystack <p>
10546
     *                         The string being checked.
10547
     *                         </p>
10548
     * @param string $needle   <p>
10549
     *                         The string being found.
10550
     *                         </p>
10551
     * @param int    $offset   [optional] <p>
10552
     *                         The offset where to start counting
10553
     *                         </p>
10554
     * @param int    $length   [optional] <p>
10555
     *                         The maximum length after the specified offset to search for the
10556
     *                         substring. It outputs a warning if the offset plus the length is
10557
     *                         greater than the haystack length.
10558
     *                         </p>
10559
     *
10560
     * @return false|int the number of times the
10561
     *                   needle substring occurs in the
10562
     *                   haystack string
10563
     */
10564
    public static function substr_count_in_byte(
10565
        string $haystack,
10566
        string $needle,
10567
        int $offset = 0,
10568
        int $length = null
10569
    ) {
10570
        if ($haystack === '' || $needle === '') {
10571
            return 0;
10572
        }
10573
10574
        if (
10575
            ($offset || $length !== null)
10576
            &&
10577
            self::$SUPPORT['mbstring_func_overload'] === true
10578
        ) {
10579
            if ($length === null) {
10580
                $lengthTmp = self::strlen($haystack);
10581
                if ($lengthTmp === false) {
10582
                    return false;
10583
                }
10584
                $length = (int) $lengthTmp;
10585
            }
10586
10587
            if (
10588
                (
10589
                    $length !== 0
10590
                    &&
10591
                    $offset !== 0
10592
                )
10593
                &&
10594
                ($length + $offset) <= 0
10595
                &&
10596
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10597
            ) {
10598
                return false;
10599
            }
10600
10601
            $haystackTmp = \substr($haystack, $offset, $length);
10602
            if ($haystackTmp === false) {
10603
                $haystackTmp = '';
10604
            }
10605
            $haystack = (string) $haystackTmp;
10606
        }
10607
10608
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10609
            // "mb_" is available if overload is used, so use it ...
10610
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10611
        }
10612
10613
        if ($length === null) {
10614
            return \substr_count($haystack, $needle, $offset);
10615
        }
10616
10617
        return \substr_count($haystack, $needle, $offset, $length);
10618
    }
10619
10620
    /**
10621
     * Returns the number of occurrences of $substring in the given string.
10622
     * By default, the comparison is case-sensitive, but can be made insensitive
10623
     * by setting $caseSensitive to false.
10624
     *
10625
     * @param string $str           <p>The input string.</p>
10626
     * @param string $substring     <p>The substring to search for.</p>
10627
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10628
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10629
     *
10630
     * @return int
10631
     */
10632 15
    public static function substr_count_simple(
10633
        string $str,
10634
        string $substring,
10635
        bool $caseSensitive = true,
10636
        string $encoding = 'UTF-8'
10637
    ): int {
10638 15
        if ($str === '' || $substring === '') {
10639 2
            return 0;
10640
        }
10641
10642 13
        if ($encoding === 'UTF-8') {
10643 7
            if ($caseSensitive) {
10644
                return (int) \mb_substr_count($str, $substring);
10645
            }
10646
10647 7
            return (int) \mb_substr_count(
10648 7
                \mb_strtoupper($str),
10649 7
                \mb_strtoupper($substring)
10650
10651
            );
10652
        }
10653
10654 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10655
10656 6
        if ($caseSensitive) {
10657 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10658
        }
10659
10660 3
        return (int) \mb_substr_count(
10661 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10662 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10663 3
            $encoding
10664
        );
10665
    }
10666
10667
    /**
10668
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10669
     *
10670
     * @param string $haystack <p>The string to search in.</p>
10671
     * @param string $needle   <p>The substring to search for.</p>
10672
     *
10673
     * @return string return the sub-string
10674
     */
10675 2
    public static function substr_ileft(string $haystack, string $needle): string
10676
    {
10677 2
        if ($haystack === '') {
10678 2
            return '';
10679
        }
10680
10681 2
        if ($needle === '') {
10682 2
            return $haystack;
10683
        }
10684
10685 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10686 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10687
        }
10688
10689 2
        return $haystack;
10690
    }
10691
10692
    /**
10693
     * Get part of a string process in bytes.
10694
     *
10695
     * @param string $str    <p>The string being checked.</p>
10696
     * @param int    $offset <p>The first position used in str.</p>
10697
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10698
     *
10699
     * @return false|string
10700
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10701
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10702
     *                      characters long, <b>FALSE</b> will be returned.
10703
     */
10704
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10705
    {
10706
        // empty string
10707
        if ($str === '' || $length === 0) {
10708
            return '';
10709
        }
10710
10711
        // whole string
10712
        if (!$offset && $length === null) {
10713
            return $str;
10714
        }
10715
10716
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10717
            // "mb_" is available if overload is used, so use it ...
10718
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10719
        }
10720
10721
        return \substr($str, $offset, $length ?? 2147483647);
10722
    }
10723
10724
    /**
10725
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10726
     *
10727
     * @param string $haystack <p>The string to search in.</p>
10728
     * @param string $needle   <p>The substring to search for.</p>
10729
     *
10730
     * @return string return the sub-string
10731
     */
10732 2
    public static function substr_iright(string $haystack, string $needle): string
10733
    {
10734 2
        if ($haystack === '') {
10735 2
            return '';
10736
        }
10737
10738 2
        if ($needle === '') {
10739 2
            return $haystack;
10740
        }
10741
10742 2
        if (self::str_iends_with($haystack, $needle) === true) {
10743 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10744
        }
10745
10746 2
        return $haystack;
10747
    }
10748
10749
    /**
10750
     * Removes an prefix ($needle) from start of the string ($haystack).
10751
     *
10752
     * @param string $haystack <p>The string to search in.</p>
10753
     * @param string $needle   <p>The substring to search for.</p>
10754
     *
10755
     * @return string return the sub-string
10756
     */
10757 2
    public static function substr_left(string $haystack, string $needle): string
10758
    {
10759 2
        if ($haystack === '') {
10760 2
            return '';
10761
        }
10762
10763 2
        if ($needle === '') {
10764 2
            return $haystack;
10765
        }
10766
10767 2
        if (self::str_starts_with($haystack, $needle) === true) {
10768 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10769
        }
10770
10771 2
        return $haystack;
10772
    }
10773
10774
    /**
10775
     * Replace text within a portion of a string.
10776
     *
10777
     * source: https://gist.github.com/stemar/8287074
10778
     *
10779
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10780
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10781
     * @param int|int[]       $offset      <p>
10782
     *                                     If start is positive, the replacing will begin at the start'th offset
10783
     *                                     into string.
10784
     *                                     <br><br>
10785
     *                                     If start is negative, the replacing will begin at the start'th character
10786
     *                                     from the end of string.
10787
     *                                     </p>
10788
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10789
     *                                     portion of string which is to be replaced. If it is negative, it
10790
     *                                     represents the number of characters from the end of string at which to
10791
     *                                     stop replacing. If it is not given, then it will default to strlen(
10792
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10793
     *                                     length is zero then this function will have the effect of inserting
10794
     *                                     replacement into string at the given start offset.</p>
10795
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10796
     *
10797
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10798
     */
10799 10
    public static function substr_replace(
10800
        $str,
10801
        $replacement,
10802
        $offset,
10803
        $length = null,
10804
        string $encoding = 'UTF-8'
10805
    ) {
10806 10
        if (\is_array($str) === true) {
10807 1
            $num = \count($str);
10808
10809
            // the replacement
10810 1
            if (\is_array($replacement) === true) {
10811 1
                $replacement = \array_slice($replacement, 0, $num);
10812
            } else {
10813 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10814
            }
10815
10816
            // the offset
10817 1
            if (\is_array($offset) === true) {
10818 1
                $offset = \array_slice($offset, 0, $num);
10819 1
                foreach ($offset as &$valueTmp) {
10820 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10821
                }
10822 1
                unset($valueTmp);
10823
            } else {
10824 1
                $offset = \array_pad([$offset], $num, $offset);
10825
            }
10826
10827
            // the length
10828 1
            if ($length === null) {
10829 1
                $length = \array_fill(0, $num, 0);
10830 1
            } elseif (\is_array($length) === true) {
10831 1
                $length = \array_slice($length, 0, $num);
10832 1
                foreach ($length as &$valueTmpV2) {
10833 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10834
                }
10835 1
                unset($valueTmpV2);
10836
            } else {
10837 1
                $length = \array_pad([$length], $num, $length);
10838
            }
10839
10840
            // recursive call
10841 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10842
        }
10843
10844 10
        if (\is_array($replacement) === true) {
10845 1
            if (\count($replacement) > 0) {
10846 1
                $replacement = $replacement[0];
10847
            } else {
10848 1
                $replacement = '';
10849
            }
10850
        }
10851
10852
        // init
10853 10
        $str = (string) $str;
10854 10
        $replacement = (string) $replacement;
10855
10856 10
        if (\is_array($length) === true) {
10857
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10858
        }
10859
10860 10
        if (\is_array($offset) === true) {
10861
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10862
        }
10863
10864 10
        if ($str === '') {
10865 1
            return $replacement;
10866
        }
10867
10868 9
        if (self::$SUPPORT['mbstring'] === true) {
10869 9
            $string_length = (int) self::strlen($str, $encoding);
10870
10871 9
            if ($offset < 0) {
10872 1
                $offset = (int) \max(0, $string_length + $offset);
10873 9
            } elseif ($offset > $string_length) {
10874 1
                $offset = $string_length;
10875
            }
10876
10877 9
            if ($length !== null && $length < 0) {
10878 1
                $length = (int) \max(0, $string_length - $offset + $length);
10879 9
            } elseif ($length === null || $length > $string_length) {
10880 4
                $length = $string_length;
10881
            }
10882
10883
            /** @noinspection AdditionOperationOnArraysInspection */
10884 9
            if (($offset + $length) > $string_length) {
10885 4
                $length = $string_length - $offset;
10886
            }
10887
10888
            /** @noinspection AdditionOperationOnArraysInspection */
10889 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10890 9
                   $replacement .
10891 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10892
        }
10893
10894
        //
10895
        // fallback for ascii only
10896
        //
10897
10898
        if (self::is_ascii($str)) {
10899
            return ($length === null) ?
10900
                \substr_replace($str, $replacement, $offset) :
10901
                \substr_replace($str, $replacement, $offset, $length);
10902
        }
10903
10904
        //
10905
        // fallback via vanilla php
10906
        //
10907
10908
        \preg_match_all('/./us', $str, $smatches);
10909
        \preg_match_all('/./us', $replacement, $rmatches);
10910
10911
        if ($length === null) {
10912
            $lengthTmp = self::strlen($str, $encoding);
10913
            if ($lengthTmp === false) {
10914
                // e.g.: non mbstring support + invalid chars
10915
                return '';
10916
            }
10917
            $length = (int) $lengthTmp;
10918
        }
10919
10920
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10921
10922
        return \implode('', $smatches[0]);
10923
    }
10924
10925
    /**
10926
     * Removes an suffix ($needle) from end of the string ($haystack).
10927
     *
10928
     * @param string $haystack <p>The string to search in.</p>
10929
     * @param string $needle   <p>The substring to search for.</p>
10930
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10931
     *
10932
     * @return string return the sub-string
10933
     */
10934 2
    public static function substr_right(
10935
        string $haystack,
10936
        string $needle,
10937
        string $encoding = 'UTF-8'
10938
    ): string {
10939 2
        if ($haystack === '') {
10940 2
            return '';
10941
        }
10942
10943 2
        if ($needle === '') {
10944 2
            return $haystack;
10945
        }
10946
10947
        if (
10948 2
            $encoding === 'UTF-8'
10949
            &&
10950 2
            \substr($haystack, -\strlen($needle)) === $needle
10951
        ) {
10952 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10953
        }
10954
10955 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10956
            return (string) self::substr(
10957
                $haystack,
10958
                0,
10959
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10960
                $encoding
10961
            );
10962
        }
10963
10964 2
        return $haystack;
10965
    }
10966
10967
    /**
10968
     * Returns a case swapped version of the string.
10969
     *
10970
     * @param string $str       <p>The input string.</p>
10971
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10972
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10973
     *
10974
     * @return string each character's case swapped
10975
     */
10976 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10977
    {
10978 6
        if ($str === '') {
10979 1
            return '';
10980
        }
10981
10982 6
        if ($cleanUtf8 === true) {
10983
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10984
            // if invalid characters are found in $haystack before $needle
10985 2
            $str = self::clean($str);
10986
        }
10987
10988 6
        if ($encoding === 'UTF-8') {
10989 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10990
        }
10991
10992 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10993
    }
10994
10995
    /**
10996
     * Checks whether symfony-polyfills are used.
10997
     *
10998
     * @return bool
10999
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
11000
     */
11001
    public static function symfony_polyfill_used(): bool
11002
    {
11003
        // init
11004
        $return = false;
11005
11006
        $returnTmp = \extension_loaded('mbstring');
11007
        if ($returnTmp === false && \function_exists('mb_strlen')) {
11008
            $return = true;
11009
        }
11010
11011
        $returnTmp = \extension_loaded('iconv');
11012
        if ($returnTmp === false && \function_exists('iconv')) {
11013
            $return = true;
11014
        }
11015
11016
        return $return;
11017
    }
11018
11019
    /**
11020
     * @param string $str
11021
     * @param int    $tabLength
11022
     *
11023
     * @return string
11024
     */
11025 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
11026
    {
11027 6
        if ($tabLength === 4) {
11028 3
            $spaces = '    ';
11029 3
        } elseif ($tabLength === 2) {
11030 1
            $spaces = '  ';
11031
        } else {
11032 2
            $spaces = \str_repeat(' ', $tabLength);
11033
        }
11034
11035 6
        return \str_replace("\t", $spaces, $str);
11036
    }
11037
11038
    /**
11039
     * Converts the first character of each word in the string to uppercase
11040
     * and all other chars to lowercase.
11041
     *
11042
     * @param string      $str                   <p>The input string.</p>
11043
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11044
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11045
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11046
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11047
     *
11048
     * @return string string with all characters of $str being title-cased
11049
     */
11050 5
    public static function titlecase(
11051
        string $str,
11052
        string $encoding = 'UTF-8',
11053
        bool $cleanUtf8 = false,
11054
        string $lang = null,
11055
        bool $tryToKeepStringLength = false
11056
    ): string {
11057 5
        if ($cleanUtf8 === true) {
11058
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11059
            // if invalid characters are found in $haystack before $needle
11060
            $str = self::clean($str);
11061
        }
11062
11063 5
        if ($lang === null && $tryToKeepStringLength === false) {
11064 5
            if ($encoding === 'UTF-8') {
11065 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
11066
            }
11067
11068 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11069
11070 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
11071
        }
11072
11073
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
11074
    }
11075
11076
    /**
11077
     * alias for "UTF8::to_ascii()"
11078
     *
11079
     * @param string $str
11080
     * @param string $subst_chr
11081
     * @param bool   $strict
11082
     *
11083
     * @return string
11084
     *
11085
     * @see UTF8::to_ascii()
11086
     * @deprecated <p>use "UTF8::to_ascii()"</p>
11087
     */
11088 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
11089
    {
11090 7
        return self::to_ascii($str, $subst_chr, $strict);
11091
    }
11092
11093
    /**
11094
     * alias for "UTF8::to_iso8859()"
11095
     *
11096
     * @param string|string[] $str
11097
     *
11098
     * @return string|string[]
11099
     *
11100
     * @see UTF8::to_iso8859()
11101
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
11102
     */
11103 2
    public static function toIso8859($str)
11104
    {
11105 2
        return self::to_iso8859($str);
11106
    }
11107
11108
    /**
11109
     * alias for "UTF8::to_latin1()"
11110
     *
11111
     * @param string|string[] $str
11112
     *
11113
     * @return string|string[]
11114
     *
11115
     * @see UTF8::to_latin1()
11116
     * @deprecated <p>use "UTF8::to_latin1()"</p>
11117
     */
11118 2
    public static function toLatin1($str)
11119
    {
11120 2
        return self::to_latin1($str);
11121
    }
11122
11123
    /**
11124
     * alias for "UTF8::to_utf8()"
11125
     *
11126
     * @param string|string[] $str
11127
     *
11128
     * @return string|string[]
11129
     *
11130
     * @see UTF8::to_utf8()
11131
     * @deprecated <p>use "UTF8::to_utf8()"</p>
11132
     */
11133 2
    public static function toUTF8($str)
11134
    {
11135 2
        return self::to_utf8($str);
11136
    }
11137
11138
    /**
11139
     * Convert a string into ASCII.
11140
     *
11141
     * @param string $str     <p>The input string.</p>
11142
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11143
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11144
     *                        performance</p>
11145
     *
11146
     * @return string
11147
     */
11148 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11149
    {
11150 38
        static $UTF8_TO_ASCII;
11151
11152 38
        if ($str === '') {
11153 3
            return '';
11154
        }
11155
11156
        // check if we only have ASCII, first (better performance)
11157 35
        if (self::is_ascii($str) === true) {
11158 9
            return $str;
11159
        }
11160
11161 28
        $str = self::clean(
11162 28
            $str,
11163 28
            true,
11164 28
            true,
11165 28
            true,
11166 28
            false,
11167 28
            true,
11168 28
            true
11169
        );
11170
11171
        // check again, if we only have ASCII, now ...
11172 28
        if (self::is_ascii($str) === true) {
11173 10
            return $str;
11174
        }
11175
11176
        if (
11177 19
            $strict === true
11178
            &&
11179 19
            self::$SUPPORT['intl'] === true
11180
        ) {
11181
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11182
            /** @noinspection PhpComposerExtensionStubsInspection */
11183
            /** @noinspection UnnecessaryCastingInspection */
11184 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11185
11186
            // check again, if we only have ASCII, now ...
11187 1
            if (self::is_ascii($str) === true) {
11188 1
                return $str;
11189
            }
11190
        }
11191
11192 19
        if (self::$ORD === null) {
11193
            self::$ORD = self::getData('ord');
11194
        }
11195
11196 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11197 19
        $chars = $ar[0];
11198 19
        $ord = null;
11199 19
        foreach ($chars as &$c) {
11200 19
            $ordC0 = self::$ORD[$c[0]];
11201
11202 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11203 15
                continue;
11204
            }
11205
11206 19
            $ordC1 = self::$ORD[$c[1]];
11207
11208
            // ASCII - next please
11209 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11210 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11211
            }
11212
11213 19
            if ($ordC0 >= 224) {
11214 8
                $ordC2 = self::$ORD[$c[2]];
11215
11216 8
                if ($ordC0 <= 239) {
11217 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11218
                }
11219
11220 8
                if ($ordC0 >= 240) {
11221 2
                    $ordC3 = self::$ORD[$c[3]];
11222
11223 2
                    if ($ordC0 <= 247) {
11224 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11225
                    }
11226
11227 2
                    if ($ordC0 >= 248) {
11228
                        $ordC4 = self::$ORD[$c[4]];
11229
11230
                        if ($ordC0 <= 251) {
11231
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11232
                        }
11233
11234
                        if ($ordC0 >= 252) {
11235
                            $ordC5 = self::$ORD[$c[5]];
11236
11237
                            if ($ordC0 <= 253) {
11238
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11239
                            }
11240
                        }
11241
                    }
11242
                }
11243
            }
11244
11245 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11246
                $c = $unknown;
11247
11248
                continue;
11249
            }
11250
11251 19
            if ($ord === null) {
11252
                $c = $unknown;
11253
11254
                continue;
11255
            }
11256
11257 19
            $bank = $ord >> 8;
11258 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11259 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11260 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11261 2
                    $UTF8_TO_ASCII[$bank] = [];
11262
                }
11263
            }
11264
11265 19
            $newchar = $ord & 255;
11266
11267
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11268 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11269
11270
                // keep for debugging
11271
                /*
11272
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11273
                echo "char: " . $c . "\n";
11274
                echo "ord: " . $ord . "\n";
11275
                echo "newchar: " . $newchar . "\n";
11276
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11277
                echo "bank:" . $bank . "\n\n";
11278
                 */
11279
11280 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11281
            } else {
11282
11283
                // keep for debugging missing chars
11284
                /*
11285
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11286
                echo "char: " . $c . "\n";
11287
                echo "ord: " . $ord . "\n";
11288
                echo "newchar: " . $newchar . "\n";
11289
                echo "bank:" . $bank . "\n\n";
11290
                 */
11291
11292 19
                $c = $unknown;
11293
            }
11294
        }
11295
11296 19
        return \implode('', $chars);
11297
    }
11298
11299
    /**
11300
     * @param mixed $str
11301
     *
11302
     * @return bool
11303
     */
11304 19
    public static function to_boolean($str): bool
11305
    {
11306
        // init
11307 19
        $str = (string) $str;
11308
11309 19
        if ($str === '') {
11310 2
            return false;
11311
        }
11312
11313
        // Info: http://php.net/manual/en/filter.filters.validate.php
11314
        $map = [
11315 17
            'true'  => true,
11316
            '1'     => true,
11317
            'on'    => true,
11318
            'yes'   => true,
11319
            'false' => false,
11320
            '0'     => false,
11321
            'off'   => false,
11322
            'no'    => false,
11323
        ];
11324
11325 17
        if (isset($map[$str])) {
11326 11
            return $map[$str];
11327
        }
11328
11329 6
        $key = \strtolower($str);
11330 6
        if (isset($map[$key])) {
11331 2
            return $map[$key];
11332
        }
11333
11334 4
        if (\is_numeric($str)) {
11335 2
            return ((float) $str + 0) > 0;
11336
        }
11337
11338 2
        return (bool) \trim($str);
11339
    }
11340
11341
    /**
11342
     * Convert given string to safe filename (and keep string case).
11343
     *
11344
     * @param string $string
11345
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11346
     *                                  simply replaced with hyphen.
11347
     * @param string $fallback_char
11348
     *
11349
     * @return string
11350
     */
11351 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11352
    {
11353 1
        if ($use_transliterate === true) {
11354 1
            $string = self::str_transliterate($string, $fallback_char);
11355
        }
11356
11357 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11358
11359 1
        $string = (string) \preg_replace(
11360
            [
11361 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11362 1
                '/[\s]+/u',                                           // 2) convert spaces to $fallback_char
11363 1
                '/[' . $fallback_char_escaped . ']+/u',               // 3) remove double $fallback_char's
11364
            ],
11365
            [
11366 1
                '',
11367 1
                $fallback_char,
11368 1
                $fallback_char,
11369
            ],
11370 1
            $string
11371
        );
11372
11373
        // trim "$fallback_char" from beginning and end of the string
11374 1
        return \trim($string, $fallback_char);
11375
    }
11376
11377
    /**
11378
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11379
     *
11380
     * @param string|string[] $str
11381
     *
11382
     * @return string|string[]
11383
     */
11384 8
    public static function to_iso8859($str)
11385
    {
11386 8
        if (\is_array($str) === true) {
11387 2
            foreach ($str as $k => &$v) {
11388 2
                $v = self::to_iso8859($v);
11389
            }
11390
11391 2
            return $str;
11392
        }
11393
11394 8
        $str = (string) $str;
11395 8
        if ($str === '') {
11396 2
            return '';
11397
        }
11398
11399 8
        return self::utf8_decode($str);
11400
    }
11401
11402
    /**
11403
     * alias for "UTF8::to_iso8859()"
11404
     *
11405
     * @param string|string[] $str
11406
     *
11407
     * @return string|string[]
11408
     *
11409
     * @see UTF8::to_iso8859()
11410
     */
11411 2
    public static function to_latin1($str)
11412
    {
11413 2
        return self::to_iso8859($str);
11414
    }
11415
11416
    /**
11417
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11418
     *
11419
     * <ul>
11420
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11421
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11422
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11423
     * case.</li>
11424
     * </ul>
11425
     *
11426
     * @param string|string[] $str                    <p>Any string or array.</p>
11427
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11428
     *
11429
     * @return string|string[] the UTF-8 encoded string
11430
     */
11431 41
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11432
    {
11433 41
        if (\is_array($str) === true) {
11434 4
            foreach ($str as $k => &$v) {
11435 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11436
            }
11437
11438 4
            return $str;
11439
        }
11440
11441 41
        $str = (string) $str;
11442 41
        if ($str === '') {
11443 6
            return $str;
11444
        }
11445
11446 41
        $max = \strlen($str);
11447 41
        $buf = '';
11448
11449 41
        for ($i = 0; $i < $max; ++$i) {
11450 41
            $c1 = $str[$i];
11451
11452 41
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11453
11454 37
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11455
11456 34
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11457
11458 34
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11459 20
                        $buf .= $c1 . $c2;
11460 20
                        ++$i;
11461
                    } else { // not valid UTF8 - convert it
11462 34
                        $buf .= self::to_utf8_convert_helper($c1);
11463
                    }
11464 34
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11465
11466 33
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11467 33
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11468
11469 33
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11470 15
                        $buf .= $c1 . $c2 . $c3;
11471 15
                        $i += 2;
11472
                    } else { // not valid UTF8 - convert it
11473 33
                        $buf .= self::to_utf8_convert_helper($c1);
11474
                    }
11475 26
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11476
11477 26
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11478 26
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11479 26
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11480
11481 26
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11482 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11483 8
                        $i += 3;
11484
                    } else { // not valid UTF8 - convert it
11485 26
                        $buf .= self::to_utf8_convert_helper($c1);
11486
                    }
11487
                } else { // doesn't look like UTF8, but should be converted
11488
11489 37
                    $buf .= self::to_utf8_convert_helper($c1);
11490
                }
11491 38
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11492
11493 4
                $buf .= self::to_utf8_convert_helper($c1);
11494
            } else { // it doesn't need conversion
11495
11496 38
                $buf .= $c1;
11497
            }
11498
        }
11499
11500
        // decode unicode escape sequences + unicode surrogate pairs
11501 41
        $buf = \preg_replace_callback(
11502 41
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11503
            /**
11504
             * @param array $matches
11505
             *
11506
             * @return string
11507
             */
11508
            static function (array $matches): string {
11509 12
                if (isset($matches[3])) {
11510 12
                    $cp = (int) \hexdec($matches[3]);
11511
                } else {
11512
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11513
                    $cp = ((int) \hexdec($matches[1]) << 10)
11514
                          + (int) \hexdec($matches[2])
11515
                          + 0x10000
11516
                          - (0xD800 << 10)
11517
                          - 0xDC00;
11518
                }
11519
11520
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11521
                //
11522
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11523
11524 12
                if ($cp < 0x80) {
11525 8
                    return (string) self::chr($cp);
11526
                }
11527
11528 9
                if ($cp < 0xA0) {
11529
                    /** @noinspection UnnecessaryCastingInspection */
11530
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11531
                }
11532
11533 9
                return self::decimal_to_chr($cp);
11534 41
            },
11535 41
            $buf
11536
        );
11537
11538 41
        if ($buf === null) {
11539
            return '';
11540
        }
11541
11542
        // decode UTF-8 codepoints
11543 41
        if ($decodeHtmlEntityToUtf8 === true) {
11544 2
            $buf = self::html_entity_decode($buf);
11545
        }
11546
11547 41
        return $buf;
11548
    }
11549
11550
    /**
11551
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11552
     *
11553
     * INFO: This is slower then "trim()"
11554
     *
11555
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11556
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11557
     *
11558
     * @param string      $str   <p>The string to be trimmed</p>
11559
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11560
     *
11561
     * @return string the trimmed string
11562
     */
11563 55
    public static function trim(string $str = '', string $chars = null): string
11564
    {
11565 55
        if ($str === '') {
11566 9
            return '';
11567
        }
11568
11569 48
        if ($chars) {
11570 27
            $chars = \preg_quote($chars, '/');
11571 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11572
        } else {
11573 21
            $pattern = "^[\s]+|[\s]+\$";
11574
        }
11575
11576 48
        if (self::$SUPPORT['mbstring'] === true) {
11577
            /** @noinspection PhpComposerExtensionStubsInspection */
11578 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11579
        }
11580
11581 8
        return self::regex_replace($str, $pattern, '', '', '/');
11582
    }
11583
11584
    /**
11585
     * Makes string's first char uppercase.
11586
     *
11587
     * @param string      $str                   <p>The input string.</p>
11588
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11589
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11590
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11591
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11592
     *
11593
     * @return string the resulting string
11594
     */
11595 69
    public static function ucfirst(
11596
        string $str,
11597
        string $encoding = 'UTF-8',
11598
        bool $cleanUtf8 = false,
11599
        string $lang = null,
11600
        bool $tryToKeepStringLength = false
11601
    ): string {
11602 69
        if ($str === '') {
11603 3
            return '';
11604
        }
11605
11606 68
        if ($cleanUtf8 === true) {
11607
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11608
            // if invalid characters are found in $haystack before $needle
11609 1
            $str = self::clean($str);
11610
        }
11611
11612 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11613
11614 68
        if ($encoding === 'UTF-8') {
11615 22
            $strPartTwo = (string) \mb_substr($str, 1);
11616
11617 22
            if ($useMbFunction === true) {
11618 22
                $strPartOne = \mb_strtoupper(
11619 22
                    (string) \mb_substr($str, 0, 1)
11620
                );
11621
            } else {
11622
                $strPartOne = self::strtoupper(
11623
                    (string) \mb_substr($str, 0, 1),
11624
                    $encoding,
11625
                    false,
11626
                    $lang,
11627 22
                    $tryToKeepStringLength
11628
                );
11629
            }
11630
        } else {
11631 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11632
11633 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11634
11635 47
            if ($useMbFunction === true) {
11636 47
                $strPartOne = \mb_strtoupper(
11637 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11638 47
                    $encoding
11639
                );
11640
            } else {
11641
                $strPartOne = self::strtoupper(
11642
                    (string) self::substr($str, 0, 1, $encoding),
11643
                    $encoding,
11644
                    false,
11645
                    $lang,
11646
                    $tryToKeepStringLength
11647
                );
11648
            }
11649
        }
11650
11651 68
        return $strPartOne . $strPartTwo;
11652
    }
11653
11654
    /**
11655
     * alias for "UTF8::ucfirst()"
11656
     *
11657
     * @param string $str
11658
     * @param string $encoding
11659
     * @param bool   $cleanUtf8
11660
     *
11661
     * @return string
11662
     *
11663
     * @see UTF8::ucfirst()
11664
     */
11665 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11666
    {
11667 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11668
    }
11669
11670
    /**
11671
     * Uppercase for all words in the string.
11672
     *
11673
     * @param string   $str        <p>The input string.</p>
11674
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11675
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11676
     *                             word.</p>
11677
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11678
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11679
     *
11680
     * @return string
11681
     */
11682 8
    public static function ucwords(
11683
        string $str,
11684
        array $exceptions = [],
11685
        string $charlist = '',
11686
        string $encoding = 'UTF-8',
11687
        bool $cleanUtf8 = false
11688
    ): string {
11689 8
        if (!$str) {
11690 2
            return '';
11691
        }
11692
11693
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11694
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11695
11696 7
        if ($cleanUtf8 === true) {
11697
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11698
            // if invalid characters are found in $haystack before $needle
11699 1
            $str = self::clean($str);
11700
        }
11701
11702 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11703
11704
        if (
11705 7
            $usePhpDefaultFunctions === true
11706
            &&
11707 7
            self::is_ascii($str) === true
11708
        ) {
11709
            return \ucwords($str);
11710
        }
11711
11712 7
        $words = self::str_to_words($str, $charlist);
11713 7
        $useExceptions = \count($exceptions) > 0;
11714
11715 7
        foreach ($words as &$word) {
11716 7
            if (!$word) {
11717 7
                continue;
11718
            }
11719
11720
            if (
11721 7
                $useExceptions === false
11722
                ||
11723 7
                !\in_array($word, $exceptions, true)
11724
            ) {
11725 7
                $word = self::ucfirst($word, $encoding);
11726
            }
11727
        }
11728
11729 7
        return \implode('', $words);
11730
    }
11731
11732
    /**
11733
     * Multi decode html entity & fix urlencoded-win1252-chars.
11734
     *
11735
     * e.g:
11736
     * 'test+test'                     => 'test test'
11737
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11738
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11739
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11740
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11741
     * 'Düsseldorf'                   => 'Düsseldorf'
11742
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11743
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11744
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11745
     *
11746
     * @param string $str          <p>The input string.</p>
11747
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11748
     *
11749
     * @return string
11750
     */
11751 4
    public static function urldecode(string $str, bool $multi_decode = true): string
11752
    {
11753 4
        if ($str === '') {
11754 3
            return '';
11755
        }
11756
11757
        if (
11758 4
            \strpos($str, '&') === false
11759
            &&
11760 4
            \strpos($str, '%') === false
11761
            &&
11762 4
            \strpos($str, '+') === false
11763
            &&
11764 4
            \strpos($str, '\u') === false
11765
        ) {
11766 3
            return self::fix_simple_utf8($str);
11767
        }
11768
11769 4
        $str = self::urldecode_unicode_helper($str);
11770
11771
        do {
11772 4
            $str_compare = $str;
11773
11774
            /**
11775
             * @psalm-suppress PossiblyInvalidArgument
11776
             */
11777 4
            $str = self::fix_simple_utf8(
11778 4
                \urldecode(
11779 4
                    self::html_entity_decode(
11780 4
                        self::to_utf8($str),
11781 4
                        \ENT_QUOTES | \ENT_HTML5
11782
                    )
11783
                )
11784
            );
11785 4
        } while ($multi_decode === true && $str_compare !== $str);
11786
11787 4
        return $str;
11788
    }
11789
11790
    /**
11791
     * Return a array with "urlencoded"-win1252 -> UTF-8
11792
     *
11793
     * @return string[]
11794
     *
11795
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11796
     */
11797 2
    public static function urldecode_fix_win1252_chars(): array
11798
    {
11799
        return [
11800 2
            '%20' => ' ',
11801
            '%21' => '!',
11802
            '%22' => '"',
11803
            '%23' => '#',
11804
            '%24' => '$',
11805
            '%25' => '%',
11806
            '%26' => '&',
11807
            '%27' => "'",
11808
            '%28' => '(',
11809
            '%29' => ')',
11810
            '%2A' => '*',
11811
            '%2B' => '+',
11812
            '%2C' => ',',
11813
            '%2D' => '-',
11814
            '%2E' => '.',
11815
            '%2F' => '/',
11816
            '%30' => '0',
11817
            '%31' => '1',
11818
            '%32' => '2',
11819
            '%33' => '3',
11820
            '%34' => '4',
11821
            '%35' => '5',
11822
            '%36' => '6',
11823
            '%37' => '7',
11824
            '%38' => '8',
11825
            '%39' => '9',
11826
            '%3A' => ':',
11827
            '%3B' => ';',
11828
            '%3C' => '<',
11829
            '%3D' => '=',
11830
            '%3E' => '>',
11831
            '%3F' => '?',
11832
            '%40' => '@',
11833
            '%41' => 'A',
11834
            '%42' => 'B',
11835
            '%43' => 'C',
11836
            '%44' => 'D',
11837
            '%45' => 'E',
11838
            '%46' => 'F',
11839
            '%47' => 'G',
11840
            '%48' => 'H',
11841
            '%49' => 'I',
11842
            '%4A' => 'J',
11843
            '%4B' => 'K',
11844
            '%4C' => 'L',
11845
            '%4D' => 'M',
11846
            '%4E' => 'N',
11847
            '%4F' => 'O',
11848
            '%50' => 'P',
11849
            '%51' => 'Q',
11850
            '%52' => 'R',
11851
            '%53' => 'S',
11852
            '%54' => 'T',
11853
            '%55' => 'U',
11854
            '%56' => 'V',
11855
            '%57' => 'W',
11856
            '%58' => 'X',
11857
            '%59' => 'Y',
11858
            '%5A' => 'Z',
11859
            '%5B' => '[',
11860
            '%5C' => '\\',
11861
            '%5D' => ']',
11862
            '%5E' => '^',
11863
            '%5F' => '_',
11864
            '%60' => '`',
11865
            '%61' => 'a',
11866
            '%62' => 'b',
11867
            '%63' => 'c',
11868
            '%64' => 'd',
11869
            '%65' => 'e',
11870
            '%66' => 'f',
11871
            '%67' => 'g',
11872
            '%68' => 'h',
11873
            '%69' => 'i',
11874
            '%6A' => 'j',
11875
            '%6B' => 'k',
11876
            '%6C' => 'l',
11877
            '%6D' => 'm',
11878
            '%6E' => 'n',
11879
            '%6F' => 'o',
11880
            '%70' => 'p',
11881
            '%71' => 'q',
11882
            '%72' => 'r',
11883
            '%73' => 's',
11884
            '%74' => 't',
11885
            '%75' => 'u',
11886
            '%76' => 'v',
11887
            '%77' => 'w',
11888
            '%78' => 'x',
11889
            '%79' => 'y',
11890
            '%7A' => 'z',
11891
            '%7B' => '{',
11892
            '%7C' => '|',
11893
            '%7D' => '}',
11894
            '%7E' => '~',
11895
            '%7F' => '',
11896
            '%80' => '`',
11897
            '%81' => '',
11898
            '%82' => '‚',
11899
            '%83' => 'ƒ',
11900
            '%84' => '„',
11901
            '%85' => '…',
11902
            '%86' => '†',
11903
            '%87' => '‡',
11904
            '%88' => 'ˆ',
11905
            '%89' => '‰',
11906
            '%8A' => 'Š',
11907
            '%8B' => '‹',
11908
            '%8C' => 'Œ',
11909
            '%8D' => '',
11910
            '%8E' => 'Ž',
11911
            '%8F' => '',
11912
            '%90' => '',
11913
            '%91' => '‘',
11914
            '%92' => '’',
11915
            '%93' => '“',
11916
            '%94' => '”',
11917
            '%95' => '•',
11918
            '%96' => '–',
11919
            '%97' => '—',
11920
            '%98' => '˜',
11921
            '%99' => '™',
11922
            '%9A' => 'š',
11923
            '%9B' => '›',
11924
            '%9C' => 'œ',
11925
            '%9D' => '',
11926
            '%9E' => 'ž',
11927
            '%9F' => 'Ÿ',
11928
            '%A0' => '',
11929
            '%A1' => '¡',
11930
            '%A2' => '¢',
11931
            '%A3' => '£',
11932
            '%A4' => '¤',
11933
            '%A5' => '¥',
11934
            '%A6' => '¦',
11935
            '%A7' => '§',
11936
            '%A8' => '¨',
11937
            '%A9' => '©',
11938
            '%AA' => 'ª',
11939
            '%AB' => '«',
11940
            '%AC' => '¬',
11941
            '%AD' => '',
11942
            '%AE' => '®',
11943
            '%AF' => '¯',
11944
            '%B0' => '°',
11945
            '%B1' => '±',
11946
            '%B2' => '²',
11947
            '%B3' => '³',
11948
            '%B4' => '´',
11949
            '%B5' => 'µ',
11950
            '%B6' => '¶',
11951
            '%B7' => '·',
11952
            '%B8' => '¸',
11953
            '%B9' => '¹',
11954
            '%BA' => 'º',
11955
            '%BB' => '»',
11956
            '%BC' => '¼',
11957
            '%BD' => '½',
11958
            '%BE' => '¾',
11959
            '%BF' => '¿',
11960
            '%C0' => 'À',
11961
            '%C1' => 'Á',
11962
            '%C2' => 'Â',
11963
            '%C3' => 'Ã',
11964
            '%C4' => 'Ä',
11965
            '%C5' => 'Å',
11966
            '%C6' => 'Æ',
11967
            '%C7' => 'Ç',
11968
            '%C8' => 'È',
11969
            '%C9' => 'É',
11970
            '%CA' => 'Ê',
11971
            '%CB' => 'Ë',
11972
            '%CC' => 'Ì',
11973
            '%CD' => 'Í',
11974
            '%CE' => 'Î',
11975
            '%CF' => 'Ï',
11976
            '%D0' => 'Ð',
11977
            '%D1' => 'Ñ',
11978
            '%D2' => 'Ò',
11979
            '%D3' => 'Ó',
11980
            '%D4' => 'Ô',
11981
            '%D5' => 'Õ',
11982
            '%D6' => 'Ö',
11983
            '%D7' => '×',
11984
            '%D8' => 'Ø',
11985
            '%D9' => 'Ù',
11986
            '%DA' => 'Ú',
11987
            '%DB' => 'Û',
11988
            '%DC' => 'Ü',
11989
            '%DD' => 'Ý',
11990
            '%DE' => 'Þ',
11991
            '%DF' => 'ß',
11992
            '%E0' => 'à',
11993
            '%E1' => 'á',
11994
            '%E2' => 'â',
11995
            '%E3' => 'ã',
11996
            '%E4' => 'ä',
11997
            '%E5' => 'å',
11998
            '%E6' => 'æ',
11999
            '%E7' => 'ç',
12000
            '%E8' => 'è',
12001
            '%E9' => 'é',
12002
            '%EA' => 'ê',
12003
            '%EB' => 'ë',
12004
            '%EC' => 'ì',
12005
            '%ED' => 'í',
12006
            '%EE' => 'î',
12007
            '%EF' => 'ï',
12008
            '%F0' => 'ð',
12009
            '%F1' => 'ñ',
12010
            '%F2' => 'ò',
12011
            '%F3' => 'ó',
12012
            '%F4' => 'ô',
12013
            '%F5' => 'õ',
12014
            '%F6' => 'ö',
12015
            '%F7' => '÷',
12016
            '%F8' => 'ø',
12017
            '%F9' => 'ù',
12018
            '%FA' => 'ú',
12019
            '%FB' => 'û',
12020
            '%FC' => 'ü',
12021
            '%FD' => 'ý',
12022
            '%FE' => 'þ',
12023
            '%FF' => 'ÿ',
12024
        ];
12025
    }
12026
12027
    /**
12028
     * Decodes an UTF-8 string to ISO-8859-1.
12029
     *
12030
     * @param string $str           <p>The input string.</p>
12031
     * @param bool   $keepUtf8Chars
12032
     *
12033
     * @return string
12034
     */
12035 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
12036
    {
12037 14
        if ($str === '') {
12038 6
            return '';
12039
        }
12040
12041
        // save for later comparision
12042 14
        $str_backup = $str;
12043 14
        $len = \strlen($str);
12044
12045 14
        if (self::$ORD === null) {
12046
            self::$ORD = self::getData('ord');
12047
        }
12048
12049 14
        if (self::$CHR === null) {
12050
            self::$CHR = self::getData('chr');
12051
        }
12052
12053 14
        $noCharFound = '?';
12054
        /** @noinspection ForeachInvariantsInspection */
12055 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
12056 14
            switch ($str[$i] & "\xF0") {
12057 14
                case "\xC0":
12058 13
                case "\xD0":
12059 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
12060 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
12061
12062 13
                    break;
12063
12064
                /** @noinspection PhpMissingBreakStatementInspection */
12065 13
                case "\xF0":
12066
                    ++$i;
12067
12068
                // no break
12069
12070 13
                case "\xE0":
12071 11
                    $str[$j] = $noCharFound;
12072 11
                    $i += 2;
12073
12074 11
                    break;
12075
12076
                default:
12077 12
                    $str[$j] = $str[$i];
12078
            }
12079
        }
12080
12081 14
        $return = \substr($str, 0, $j);
12082 14
        if ($return === false) {
12083
            $return = '';
12084
        }
12085
12086
        if (
12087 14
            $keepUtf8Chars === true
12088
            &&
12089 14
            self::strlen($return) >= (int) self::strlen($str_backup)
12090
        ) {
12091 2
            return $str_backup;
12092
        }
12093
12094 14
        return $return;
12095
    }
12096
12097
    /**
12098
     * Encodes an ISO-8859-1 string to UTF-8.
12099
     *
12100
     * @param string $str <p>The input string.</p>
12101
     *
12102
     * @return string
12103
     */
12104 14
    public static function utf8_encode(string $str): string
12105
    {
12106 14
        if ($str === '') {
12107 14
            return '';
12108
        }
12109
12110 14
        $str = \utf8_encode($str);
12111
12112
        // the polyfill maybe return false
12113
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12114
        /** @psalm-suppress TypeDoesNotContainType */
12115 14
        if ($str === false) {
12116
            return '';
12117
        }
12118
12119 14
        return $str;
12120
    }
12121
12122
    /**
12123
     * fix -> utf8-win1252 chars
12124
     *
12125
     * @param string $str <p>The input string.</p>
12126
     *
12127
     * @return string
12128
     *
12129
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12130
     */
12131 2
    public static function utf8_fix_win1252_chars(string $str): string
12132
    {
12133 2
        return self::fix_simple_utf8($str);
12134
    }
12135
12136
    /**
12137
     * Returns an array with all utf8 whitespace characters.
12138
     *
12139
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12140
     *
12141
     * @return string[]
12142
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12143
     *                  as defined in above URL
12144
     */
12145 2
    public static function whitespace_table(): array
12146
    {
12147 2
        return self::$WHITESPACE_TABLE;
12148
    }
12149
12150
    /**
12151
     * Limit the number of words in a string.
12152
     *
12153
     * @param string $str      <p>The input string.</p>
12154
     * @param int    $limit    <p>The limit of words as integer.</p>
12155
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12156
     *
12157
     * @return string
12158
     */
12159 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12160
    {
12161 2
        if ($str === '' || $limit < 1) {
12162 2
            return '';
12163
        }
12164
12165 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12166
12167
        if (
12168 2
            !isset($matches[0])
12169
            ||
12170 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12171
        ) {
12172 2
            return $str;
12173
        }
12174
12175 2
        return \rtrim($matches[0]) . $strAddOn;
12176
    }
12177
12178
    /**
12179
     * Wraps a string to a given number of characters
12180
     *
12181
     * @see http://php.net/manual/en/function.wordwrap.php
12182
     *
12183
     * @param string $str   <p>The input string.</p>
12184
     * @param int    $width [optional] <p>The column width.</p>
12185
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12186
     * @param bool   $cut   [optional] <p>
12187
     *                      If the cut is set to true, the string is
12188
     *                      always wrapped at or before the specified width. So if you have
12189
     *                      a word that is larger than the given width, it is broken apart.
12190
     *                      </p>
12191
     *
12192
     * @return string
12193
     *                <p>The given string wrapped at the specified column.</p>
12194
     */
12195 10
    public static function wordwrap(
12196
        string $str,
12197
        int $width = 75,
12198
        string $break = "\n",
12199
        bool $cut = false
12200
    ): string {
12201 10
        if ($str === '' || $break === '') {
12202 3
            return '';
12203
        }
12204
12205 8
        $strSplit = \explode($break, $str);
12206 8
        if ($strSplit === false) {
12207
            return '';
12208
        }
12209
12210 8
        $chars = [];
12211 8
        $wordSplit = '';
12212 8
        foreach ($strSplit as $i => $iValue) {
12213 8
            if ($i) {
12214 1
                $chars[] = $break;
12215 1
                $wordSplit .= '#';
12216
            }
12217
12218 8
            foreach (self::str_split($iValue) as $c) {
12219 8
                $chars[] = $c;
12220 8
                $wordSplit .= $c === ' ' ? ' ' : '?';
12221
            }
12222
        }
12223
12224 8
        $strReturn = '';
12225 8
        $j = 0;
12226 8
        $b = $i = -1;
12227 8
        $wordSplit = \wordwrap($wordSplit, $width, '#', $cut);
12228
12229 8
        while (false !== $b = \mb_strpos($wordSplit, '#', $b + 1)) {
12230 6
            for (++$i; $i < $b; ++$i) {
12231 6
                $strReturn .= $chars[$j];
12232 6
                unset($chars[$j++]);
12233
            }
12234
12235
            if (
12236 6
                $break === $chars[$j]
12237
                ||
12238 6
                $chars[$j] === ' '
12239
            ) {
12240 3
                unset($chars[$j++]);
12241
            }
12242
12243 6
            $strReturn .= $break;
12244
        }
12245
12246 8
        return $strReturn . \implode('', $chars);
12247
    }
12248
12249
    /**
12250
     * Line-Wrap the string after $limit, but also after the next word.
12251
     *
12252
     * @param string $str
12253
     * @param int    $limit
12254
     *
12255
     * @return string
12256
     */
12257 1
    public static function wordwrap_per_line(string $str, int $limit): string
12258
    {
12259 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12260
12261 1
        $string = '';
12262 1
        foreach ($strings as &$value) {
12263 1
            if ($value === false) {
12264
                continue;
12265
            }
12266
12267 1
            $string .= \wordwrap($value, $limit);
12268 1
            $string .= "\n";
12269
        }
12270
12271 1
        return $string;
12272
    }
12273
12274
    /**
12275
     * Returns an array of Unicode White Space characters.
12276
     *
12277
     * @return string[] an array with numeric code point as key and White Space Character as value
12278
     */
12279 2
    public static function ws(): array
12280
    {
12281 2
        return self::$WHITESPACE;
12282
    }
12283
12284
    /**
12285
     * @param string $str
12286
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12287
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12288
     *
12289
     * @return string
12290
     */
12291 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12292
    {
12293 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12294 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12295
12296 33
        if ($useLower === true) {
12297 2
            $str = \str_replace(
12298 2
                $upper,
12299 2
                $lower,
12300 2
                $str
12301
            );
12302
        } else {
12303 31
            $str = \str_replace(
12304 31
                $lower,
12305 31
                $upper,
12306 31
                $str
12307
            );
12308
        }
12309
12310 33
        if ($fullCaseFold) {
12311 31
            static $FULL_CASE_FOLD = null;
12312 31
            if ($FULL_CASE_FOLD === null) {
12313 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12314
            }
12315
12316 31
            if ($useLower === true) {
12317 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12318
            } else {
12319 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12320
            }
12321
        }
12322
12323 33
        return $str;
12324
    }
12325
12326
    /**
12327
     * get data from "/data/*.php"
12328
     *
12329
     * @param string $file
12330
     *
12331
     * @return array
12332
     */
12333 6
    private static function getData(string $file): array
12334
    {
12335
        /** @noinspection PhpIncludeInspection */
12336
        /** @noinspection UsingInclusionReturnValueInspection */
12337
        /** @psalm-suppress UnresolvableInclude */
12338 6
        return include __DIR__ . '/data/' . $file . '.php';
12339
    }
12340
12341
    /**
12342
     * get data from "/data/*.php"
12343
     *
12344
     * @param string $file
12345
     *
12346
     * @return false|mixed will return false on error
12347
     */
12348 9
    private static function getDataIfExists(string $file)
12349
    {
12350 9
        $file = __DIR__ . '/data/' . $file . '.php';
12351 9
        if (\file_exists($file)) {
12352
            /** @noinspection PhpIncludeInspection */
12353
            /** @noinspection UsingInclusionReturnValueInspection */
12354 8
            return include $file;
12355
        }
12356
12357 2
        return false;
12358
    }
12359
12360
    /**
12361
     * @return true|null
12362
     */
12363 9
    private static function initEmojiData()
12364
    {
12365 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12366 1
            if (self::$EMOJI === null) {
12367 1
                self::$EMOJI = self::getData('emoji');
12368
            }
12369
12370 1
            \uksort(
12371 1
                self::$EMOJI,
12372
                static function (string $a, string $b): int {
12373 1
                    return \strlen($b) <=> \strlen($a);
12374 1
                }
12375
            );
12376
12377 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12378 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12379
12380 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12381 1
                $tmpKey = \crc32($key);
12382 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12383
            }
12384
12385 1
            return true;
12386
        }
12387
12388 9
        return null;
12389
    }
12390
12391
    /**
12392
     * Checks whether mbstring "overloaded" is active on the server.
12393
     *
12394
     * @return bool
12395
     */
12396
    private static function mbstring_overloaded(): bool
12397
    {
12398
        /**
12399
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12400
         */
12401
12402
        /** @noinspection PhpComposerExtensionStubsInspection */
12403
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12404
        return \defined('MB_OVERLOAD_STRING')
12405
               &&
12406
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12407
    }
12408
12409
    /**
12410
     * @param array $strings
12411
     * @param bool  $removeEmptyValues
12412
     * @param int   $removeShortValues
12413
     *
12414
     * @return array
12415
     */
12416 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12417
    {
12418
        // init
12419 2
        $return = [];
12420
12421 2
        foreach ($strings as &$str) {
12422
            if (
12423 2
                $removeShortValues !== null
12424
                &&
12425 2
                \mb_strlen($str) <= $removeShortValues
12426
            ) {
12427 2
                continue;
12428
            }
12429
12430
            if (
12431 2
                $removeEmptyValues === true
12432
                &&
12433 2
                \trim($str) === ''
12434
            ) {
12435 2
                continue;
12436
            }
12437
12438 2
            $return[] = $str;
12439
        }
12440
12441 2
        return $return;
12442
    }
12443
12444
    /**
12445
     * rxClass
12446
     *
12447
     * @param string $s
12448
     * @param string $class
12449
     *
12450
     * @return string
12451
     */
12452 33
    private static function rxClass(string $s, string $class = ''): string
12453
    {
12454 33
        static $RX_CLASS_CACHE = [];
12455
12456 33
        $cacheKey = $s . $class;
12457
12458 33
        if (isset($RX_CLASS_CACHE[$cacheKey])) {
12459 21
            return $RX_CLASS_CACHE[$cacheKey];
12460
        }
12461
12462 16
        $classArray = [$class];
12463
12464
        /** @noinspection SuspiciousLoopInspection */
12465
        /** @noinspection AlterInForeachInspection */
12466 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12467 15
            if ($s === '-') {
12468
                $classArray[0] = '-' . $classArray[0];
12469 15
            } elseif (!isset($s[2])) {
12470 15
                $classArray[0] .= \preg_quote($s, '/');
12471 1
            } elseif (self::strlen($s) === 1) {
12472 1
                $classArray[0] .= $s;
12473
            } else {
12474 15
                $classArray[] = $s;
12475
            }
12476
        }
12477
12478 16
        if ($classArray[0]) {
12479 16
            $classArray[0] = '[' . $classArray[0] . ']';
12480
        }
12481
12482 16
        if (\count($classArray) === 1) {
12483 16
            $return = $classArray[0];
12484
        } else {
12485
            $return = '(?:' . \implode('|', $classArray) . ')';
12486
        }
12487
12488 16
        $RX_CLASS_CACHE[$cacheKey] = $return;
12489
12490 16
        return $return;
12491
    }
12492
12493
    /**
12494
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12495
     *
12496
     * @param string $names
12497
     * @param string $delimiter
12498
     * @param string $encoding
12499
     *
12500
     * @return string
12501
     */
12502 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12503
    {
12504
        // init
12505 1
        $namesArray = \explode($delimiter, $names);
12506
12507 1
        if ($namesArray === false) {
12508
            return '';
12509
        }
12510
12511
        $specialCases = [
12512 1
            'names' => [
12513
                'ab',
12514
                'af',
12515
                'al',
12516
                'and',
12517
                'ap',
12518
                'bint',
12519
                'binte',
12520
                'da',
12521
                'de',
12522
                'del',
12523
                'den',
12524
                'der',
12525
                'di',
12526
                'dit',
12527
                'ibn',
12528
                'la',
12529
                'mac',
12530
                'nic',
12531
                'of',
12532
                'ter',
12533
                'the',
12534
                'und',
12535
                'van',
12536
                'von',
12537
                'y',
12538
                'zu',
12539
            ],
12540
            'prefixes' => [
12541
                'al-',
12542
                "d'",
12543
                'ff',
12544
                "l'",
12545
                'mac',
12546
                'mc',
12547
                'nic',
12548
            ],
12549
        ];
12550
12551 1
        foreach ($namesArray as &$name) {
12552 1
            if (\in_array($name, $specialCases['names'], true)) {
12553 1
                continue;
12554
            }
12555
12556 1
            $continue = false;
12557
12558 1
            if ($delimiter === '-') {
12559 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12560 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12561 1
                        $continue = true;
12562
                    }
12563
                }
12564 1
                unset($beginning);
12565
            }
12566
12567 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12568 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12569 1
                    $continue = true;
12570
                }
12571
            }
12572 1
            unset($beginning);
12573
12574 1
            if ($continue === true) {
12575 1
                continue;
12576
            }
12577
12578 1
            $name = self::ucfirst($name);
12579
        }
12580
12581 1
        return \implode($delimiter, $namesArray);
12582
    }
12583
12584
    /**
12585
     * Generic case sensitive transformation for collation matching.
12586
     *
12587
     * @param string $str <p>The input string</p>
12588
     *
12589
     * @return string|null
12590
     */
12591 6
    private static function strtonatfold(string $str)
12592
    {
12593 6
        return \preg_replace(
12594 6
            '/\p{Mn}+/u',
12595 6
            '',
12596 6
            \Normalizer::normalize($str, \Normalizer::NFD)
12597
        );
12598
    }
12599
12600
    /**
12601
     * @param int|string $input
12602
     *
12603
     * @return string
12604
     */
12605 31
    private static function to_utf8_convert_helper($input): string
12606
    {
12607
        // init
12608 31
        $buf = '';
12609
12610 31
        if (self::$ORD === null) {
12611 1
            self::$ORD = self::getData('ord');
12612
        }
12613
12614 31
        if (self::$CHR === null) {
12615 1
            self::$CHR = self::getData('chr');
12616
        }
12617
12618 31
        if (self::$WIN1252_TO_UTF8 === null) {
12619 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12620
        }
12621
12622 31
        $ordC1 = self::$ORD[$input];
12623 31
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12624 31
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12625
        } else {
12626 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12627 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12628 1
            $buf .= $cc1 . $cc2;
12629
        }
12630
12631 31
        return $buf;
12632
    }
12633
12634
    /**
12635
     * @param string $str
12636
     *
12637
     * @return string
12638
     */
12639 9
    private static function urldecode_unicode_helper(string $str): string
12640
    {
12641 9
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
12642 9
        if (\preg_match($pattern, $str)) {
12643 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
12644
        }
12645
12646 9
        return $str;
12647
    }
12648
}
12649