Passed
Push — master ( 6b890c...bc2bd6 )
by Lars
04:16
created

UTF8   F

Complexity

Total Complexity 1697

Size/Duplication

Total Lines 12521
Duplicated Lines 0 %

Test Coverage

Coverage 79.75%

Importance

Changes 0
Metric Value
eloc 4347
dl 0
loc 12521
ccs 3032
cts 3802
cp 0.7975
rs 0.8
c 0
b 0
f 0
wmc 1697

293 Methods

Rating   Name   Duplication   Size   Complexity  
A callback() 0 3 1
A binary_to_str() 0 12 3
A bom() 0 3 1
A chr_to_decimal() 0 30 6
A file_has_bom() 0 8 2
A add_bom_to_string() 0 7 2
A filter_input() 0 13 2
A array_change_key_case() 0 20 5
A get_unique_string() 0 15 2
A is_bom() 0 10 3
A is_hexadecimal() 0 8 2
A encode_mimeheader() 0 25 5
A count_chars() 0 11 1
A ctype_loaded() 0 3 1
A has_uppercase() 0 8 2
A isBinary() 0 3 1
D chr() 0 101 18
A html_escape() 0 6 1
C get_file_type() 0 86 13
A chr_to_int() 0 3 1
C is_utf16() 0 65 16
A isHtml() 0 3 1
C filter() 0 54 13
A isBase64() 0 3 1
A is_html() 0 12 2
A decode_mimeheader() 0 15 5
A html_decode() 0 3 1
A isUtf32() 0 3 1
A chunk_split() 0 3 1
A is_alpha() 0 8 2
B get_random_string() 0 53 10
A fix_utf8() 0 30 4
A first_char() 0 11 4
A isUtf8() 0 3 1
A css_stripe_media_queries() 0 6 1
A clean() 0 46 6
A is_serialized() 0 11 3
A is_uppercase() 0 8 2
A is_ascii() 0 7 2
A is_blank() 0 8 2
D getCharDirection() 0 105 118
A htmlspecialchars() 0 11 3
A filter_var_array() 0 9 2
A __construct() 0 2 1
A decimal_to_chr() 0 3 1
B between() 0 48 8
A codepoints() 0 29 4
A chr_map() 0 5 1
A cleanup() 0 25 2
A char_at() 0 7 2
A chars() 0 3 1
B is_binary() 0 35 9
A intlChar_loaded() 0 3 1
A finfo_loaded() 0 3 1
A fits_inside() 0 3 1
A is_binary_file() 0 16 3
A intl_loaded() 0 3 1
A html_stripe_empty_tags() 0 6 1
A chr_size_list() 0 17 3
F extract_text() 0 175 34
A isBom() 0 3 1
A int_to_chr() 0 3 1
A is_lowercase() 0 8 2
A hasBom() 0 3 1
A iconv_loaded() 0 3 1
A isAscii() 0 3 1
A filter_var() 0 9 2
A is_empty() 0 3 1
B html_encode() 0 42 7
A isUtf16() 0 3 1
F encode() 0 139 37
C is_utf32() 0 65 16
A is_alphanumeric() 0 8 2
A fix_simple_utf8() 0 19 4
B checkForSupport() 0 51 6
B is_json() 0 27 8
A int_to_hex() 0 7 2
A has_lowercase() 0 8 2
A is_base64() 0 16 5
A hex_to_int() 0 14 3
A htmlentities() 0 23 3
A hex_to_chr() 0 3 1
A isJson() 0 3 1
A filter_input_array() 0 9 2
A getSupportInfo() 0 11 3
A chr_to_hex() 0 11 3
A collapse_whitespace() 0 8 2
A access() 0 11 4
B file_get_contents() 0 54 10
A emoji_decode() 0 16 2
D is_utf8() 0 144 31
A lcword() 0 8 1
A emoji_encode() 0 16 2
A lowerCaseFirst() 0 8 1
A lcfirst() 0 44 5
A json_loaded() 0 3 1
A lcwords() 0 31 6
A json_decode() 0 14 2
A json_encode() 0 10 2
A str_substr_after_first_separator() 0 28 6
A max() 0 14 3
B str_camelize() 0 70 10
A parse_str() 0 16 4
A str_contains() 0 10 2
B str_to_lines() 0 29 8
A substr_in_byte() 0 18 6
A strnatcasecmp() 0 5 1
A substr_left() 0 15 4
D strlen() 0 96 19
A str_isubstr_last() 0 20 4
A str_replace_beginning() 0 21 6
A remove_left() 0 21 4
B stripos() 0 59 11
A str_offset_exists() 0 10 2
D strrchr() 0 101 20
A to_filename() 0 24 2
A str_iends_with() 0 7 3
A max_chr_width() 0 8 2
F utf8_decode() 0 74 15
A ltrim() 0 19 4
A remove_html() 0 3 1
B str_longest_common_suffix() 0 51 10
C wordwrap() 0 51 12
B ucfirst() 0 57 7
A str_pad_both() 0 7 1
A str_index_last() 0 11 1
A str_substr_last() 0 33 6
A mbstring_loaded() 0 3 1
A str_limit() 0 26 6
A toUTF8() 0 3 1
A string() 0 10 1
C normalize_encoding() 0 134 14
B rxClass() 0 39 8
A str_ensure_right() 0 13 4
B str_titleize_for_humans() 0 152 5
A normalize_whitespace() 0 30 6
A str_starts_with() 0 3 1
A str_humanize() 0 15 1
C substr_count_in_byte() 0 54 15
A strchr() 0 8 1
A strichr() 0 8 1
A str_index_first() 0 11 1
A strlen_in_byte() 0 12 3
A str_ireplace_ending() 0 21 6
A rtrim() 0 19 4
C str_longest_common_substring() 0 73 16
A regex_replace() 0 20 3
A titlecase() 0 24 5
A getData() 0 6 1
A str_iindex_first() 0 11 1
B strtolower() 0 50 9
B urldecode() 0 44 9
A str_isubstr_before_first_separator() 0 16 5
B strrev() 0 43 10
A replace_all() 0 11 2
D substr_replace() 0 124 27
A removeBOM() 0 3 1
A strstr_in_byte() 0 12 4
A str_matches_pattern() 0 3 1
B str_titleize() 0 55 10
A ws() 0 3 1
A str_replace_first() 0 10 2
A toLatin1() 0 3 1
A str_pad_right() 0 7 1
B ucwords() 0 48 9
A to_boolean() 0 35 5
C stristr() 0 68 15
A strncasecmp() 0 10 1
B strwidth() 0 40 8
A trim() 0 19 4
A str_upper_camelize() 0 8 1
A substr_compare() 0 33 6
C substr_count() 0 62 16
A strnatcmp() 0 3 2
D str_pad() 0 146 16
A str_ireplace() 0 18 3
A to_latin1() 0 3 1
A str_replace_ending() 0 21 6
A string_has_bom() 0 10 3
B strtr() 0 33 8
A str_contains_all() 0 23 6
A normalize_line_ending() 0 3 1
A str_isubstr_after_last_separator() 0 23 5
B range() 0 41 10
B strspn() 0 30 10
A strcasecmp() 0 5 1
A str_transliterate() 0 3 1
B rawurldecode() 0 44 9
B str_capitalize_name_helper() 0 80 10
A utf8_encode() 0 32 6
A normalize_msword() 0 43 2
C str_detect_encoding() 0 111 13
A spaces_to_tabs() 0 11 3
A str_istarts_with() 0 7 3
A str_replace() 0 8 1
A substr_iright() 0 15 4
A replace() 0 11 2
A to_iso8859() 0 16 4
A words_limit() 0 17 5
A strip_tags() 0 15 4
A pcre_utf8_support() 0 4 1
A str_isubstr_before_last_separator() 0 21 6
D str_truncate_safe() 0 78 18
A substr_right() 0 31 6
D str_split() 0 125 28
A str_ends_with_any() 0 13 4
A strrpos_in_byte() 0 12 4
F strrpos() 0 118 26
A remove_right() 0 22 4
A remove_html_breaks() 0 3 1
A showSupport() 0 8 2
A remove_invisible_characters() 0 19 3
A single_chr_html_encode() 0 15 4
A str_replace_last() 0 12 2
A str_iindex_last() 0 11 1
A str_substr_before_last_separator() 0 31 6
B strtocasefold() 0 33 7
A tabs_to_spaces() 0 11 3
B str_truncate() 0 44 7
D strripos() 0 96 19
A strpos_in_byte() 0 12 4
A str_ends_with() 0 3 1
F to_ascii() 0 149 27
A reduce_string_array() 0 26 6
B str_longest_common_prefix() 0 48 8
A mbstring_overloaded() 0 11 2
A str_pad_left() 0 7 1
A str_substr_first() 0 33 6
A remove_bom() 0 21 5
A str_repeat() 0 5 1
A strpbrk() 0 11 4
A whitespace_table() 0 3 1
A substr_count_simple() 0 32 6
A str_iends_with_any() 0 13 4
A str_substr_after_last_separator() 0 28 6
A str_isubstr_after_first_separator() 0 23 5
B str_snakeize() 0 55 6
A str_sort() 0 15 3
D to_utf8() 0 117 35
A ucword() 0 3 1
A str_underscored() 0 3 1
A str_offset_get() 0 14 4
A strip_whitespace() 0 7 2
A getDataIfExists() 0 10 2
A toAscii() 0 3 1
A str_capitalize_name() 0 8 1
B str_limit_after_word() 0 53 11
A str_upper_first() 0 8 1
A normalizeEncoding() 0 3 1
A swapCase() 0 17 4
A substr_ileft() 0 15 4
A str_dasherize() 0 3 1
A str_ensure_left() 0 11 3
B urldecode_fix_win1252_chars() 0 227 1
A toIso8859() 0 3 1
C ord() 0 65 16
A strtonatfold() 0 3 1
C strcspn() 0 49 12
A fixStrCaseHelper() 0 33 5
B str_split_pattern() 0 42 9
D strstr() 0 92 18
A str_isubstr_first() 0 25 4
A str_last_char() 0 13 4
A str_ireplace_beginning() 0 21 6
A str_substr_before_first_separator() 0 32 6
F substr() 0 143 32
A wordwrap_per_line() 0 15 3
A str_surround() 0 3 1
A strncmp() 0 19 4
A str_insert() 0 28 4
A utf8_fix_win1252_chars() 0 3 1
A replace_diamond_question_mark() 0 35 5
A to_utf8_convert_helper() 0 27 5
B str_delimit() 0 33 8
B strtoupper() 0 50 9
A min() 0 14 3
C html_entity_decode() 0 86 17
A str_starts_with_any() 0 17 5
B strrichr() 0 54 11
A split() 0 6 1
A str_istarts_with_any() 0 17 5
A str_contains_any() 0 23 6
A initEmojiData() 0 20 4
A remove_duplicates() 0 14 4
B str_slice() 0 33 10
F strpos() 0 131 27
A str_shuffle() 0 35 6
A strcmp() 0 5 2
A str_word_count() 0 23 5
A strripos_in_byte() 0 12 4
A str_to_binary() 0 5 1
A symfony_polyfill_used() 0 16 5
B str_to_words() 0 36 8

How to fix   Complexity   

Complex Class

Complex classes like UTF8 often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use UTF8, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
final class UTF8
8
{
9
    // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
10
    // This regular expression is a work around for http://bugs.exim.org/1279
11
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
12
13
    /**
14
     * Bom => Byte-Length
15
     *
16
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
17
     *
18
     * @var array
19
     */
20
    private static $BOM = [
21
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
22
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
23
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
24
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
25
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
26
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
27
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
28
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
29
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
30
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
31
    ];
32
33
    /**
34
     * Numeric code point => UTF-8 Character
35
     *
36
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
37
     *
38
     * @var array
39
     */
40
    private static $WHITESPACE = [
41
        // NUL Byte
42
        0 => "\x0",
43
        // Tab
44
        9 => "\x9",
45
        // New Line
46
        10 => "\xa",
47
        // Vertical Tab
48
        11 => "\xb",
49
        // Carriage Return
50
        13 => "\xd",
51
        // Ordinary Space
52
        32 => "\x20",
53
        // NO-BREAK SPACE
54
        160 => "\xc2\xa0",
55
        // OGHAM SPACE MARK
56
        5760 => "\xe1\x9a\x80",
57
        // MONGOLIAN VOWEL SEPARATOR
58
        6158 => "\xe1\xa0\x8e",
59
        // EN QUAD
60
        8192 => "\xe2\x80\x80",
61
        // EM QUAD
62
        8193 => "\xe2\x80\x81",
63
        // EN SPACE
64
        8194 => "\xe2\x80\x82",
65
        // EM SPACE
66
        8195 => "\xe2\x80\x83",
67
        // THREE-PER-EM SPACE
68
        8196 => "\xe2\x80\x84",
69
        // FOUR-PER-EM SPACE
70
        8197 => "\xe2\x80\x85",
71
        // SIX-PER-EM SPACE
72
        8198 => "\xe2\x80\x86",
73
        // FIGURE SPACE
74
        8199 => "\xe2\x80\x87",
75
        // PUNCTUATION SPACE
76
        8200 => "\xe2\x80\x88",
77
        // THIN SPACE
78
        8201 => "\xe2\x80\x89",
79
        //HAIR SPACE
80
        8202 => "\xe2\x80\x8a",
81
        // LINE SEPARATOR
82
        8232 => "\xe2\x80\xa8",
83
        // PARAGRAPH SEPARATOR
84
        8233 => "\xe2\x80\xa9",
85
        // NARROW NO-BREAK SPACE
86
        8239 => "\xe2\x80\xaf",
87
        // MEDIUM MATHEMATICAL SPACE
88
        8287 => "\xe2\x81\x9f",
89
        // IDEOGRAPHIC SPACE
90
        12288 => "\xe3\x80\x80",
91
    ];
92
93
    /**
94
     * @var array
95
     */
96
    private static $WHITESPACE_TABLE = [
97
        'SPACE'                     => "\x20",
98
        'NO-BREAK SPACE'            => "\xc2\xa0",
99
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
100
        'EN QUAD'                   => "\xe2\x80\x80",
101
        'EM QUAD'                   => "\xe2\x80\x81",
102
        'EN SPACE'                  => "\xe2\x80\x82",
103
        'EM SPACE'                  => "\xe2\x80\x83",
104
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
105
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
106
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
107
        'FIGURE SPACE'              => "\xe2\x80\x87",
108
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
109
        'THIN SPACE'                => "\xe2\x80\x89",
110
        'HAIR SPACE'                => "\xe2\x80\x8a",
111
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
112
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
113
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
114
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
115
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
116
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
117
    ];
118
119
    /**
120
     * bidirectional text chars
121
     *
122
     * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
123
     *
124
     * @var array
125
     */
126
    private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
127
        // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
128
        8234 => "\xE2\x80\xAA",
129
        // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
130
        8235 => "\xE2\x80\xAB",
131
        // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
132
        8236 => "\xE2\x80\xAC",
133
        // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
134
        8237 => "\xE2\x80\xAD",
135
        // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
136
        8238 => "\xE2\x80\xAE",
137
        // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
138
        8294 => "\xE2\x81\xA6",
139
        // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
140
        8295 => "\xE2\x81\xA7",
141
        // FIRST STRONG ISOLATE // (use -> dir = "auto")
142
        8296 => "\xE2\x81\xA8",
143
        // POP DIRECTIONAL ISOLATE
144
        8297 => "\xE2\x81\xA9",
145
    ];
146
147
    /**
148
     * @var array
149
     */
150
    private static $COMMON_CASE_FOLD = [
151
        'upper' => [
152
            'µ',
153
            'ſ',
154
            "\xCD\x85",
155
            'ς',
156
            'ẞ',
157
            "\xCF\x90",
158
            "\xCF\x91",
159
            "\xCF\x95",
160
            "\xCF\x96",
161
            "\xCF\xB0",
162
            "\xCF\xB1",
163
            "\xCF\xB5",
164
            "\xE1\xBA\x9B",
165
            "\xE1\xBE\xBE",
166
        ],
167
        'lower' => [
168
            'μ',
169
            's',
170
            'ι',
171
            'σ',
172
            'ß',
173
            'β',
174
            'θ',
175
            'φ',
176
            'π',
177
            'κ',
178
            'ρ',
179
            'ε',
180
            "\xE1\xB9\xA1",
181
            'ι',
182
        ],
183
    ];
184
185
    /**
186
     * @var array
187
     */
188
    private static $SUPPORT = [];
189
190
    /**
191
     * @var array|null
192
     */
193
    private static $BROKEN_UTF8_FIX;
194
195
    /**
196
     * @var array|null
197
     */
198
    private static $WIN1252_TO_UTF8;
199
200
    /**
201
     * @var array|null
202
     */
203
    private static $ENCODINGS;
204
205
    /**
206
     * @var array|null
207
     */
208
    private static $ORD;
209
210
    /**
211
     * @var array|null
212
     */
213
    private static $EMOJI;
214
215
    /**
216
     * @var array|null
217
     */
218
    private static $EMOJI_VALUES_CACHE;
219
220
    /**
221
     * @var array|null
222
     */
223
    private static $EMOJI_KEYS_CACHE;
224
225
    /**
226
     * @var array|null
227
     */
228
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
229
230
    /**
231
     * @var array|null
232
     */
233
    private static $CHR;
234
235
    /**
236
     * __construct()
237
     */
238 32
    public function __construct()
239
    {
240 32
    }
241
242
    /**
243
     * Return the character at the specified position: $str[1] like functionality.
244
     *
245
     * @param string $str      <p>A UTF-8 string.</p>
246
     * @param int    $pos      <p>The position of character to return.</p>
247
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
248
     *
249
     * @return string single multi-byte character
250
     */
251 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
252
    {
253 3
        if ($str === '' || $pos < 0) {
254 2
            return '';
255
        }
256
257 3
        if ($encoding === 'UTF-8') {
258 3
            return (string) \mb_substr($str, $pos, 1);
259
        }
260
261
        return (string) self::substr($str, $pos, 1, $encoding);
262
    }
263
264
    /**
265
     * Prepends UTF-8 BOM character to the string and returns the whole string.
266
     *
267
     * INFO: If BOM already existed there, the Input string is returned.
268
     *
269
     * @param string $str <p>The input string.</p>
270
     *
271
     * @return string the output string that contains BOM
272
     */
273 2
    public static function add_bom_to_string(string $str): string
274
    {
275 2
        if (self::string_has_bom($str) === false) {
276 2
            $str = self::bom() . $str;
277
        }
278
279 2
        return $str;
280
    }
281
282
    /**
283
     * Changes all keys in an array.
284
     *
285
     * @param array  $array    <p>The array to work on</p>
286
     * @param int    $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
287
     *                         or <strong>CASE_LOWER</strong> (default)</p>
288
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
289
     *
290
     * @return string[] an array with its keys lower or uppercased
291
     */
292 2
    public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array
293
    {
294
        if (
295 2
            $case !== \CASE_LOWER
296
            &&
297 2
            $case !== \CASE_UPPER
298
        ) {
299
            $case = \CASE_LOWER;
300
        }
301
302 2
        $return = [];
303 2
        foreach ($array as $key => &$value) {
304 2
            $key = $case === \CASE_LOWER
305 2
                ? self::strtolower((string) $key, $encoding)
306 2
                : self::strtoupper((string) $key, $encoding);
307
308 2
            $return[$key] = $value;
309
        }
310
311 2
        return $return;
312
    }
313
314
    /**
315
     * Returns the substring between $start and $end, if found, or an empty
316
     * string. An optional offset may be supplied from which to begin the
317
     * search for the start string.
318
     *
319
     * @param string $str
320
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
321
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
322
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
323
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
324
     *
325
     * @return string
326
     */
327 16
    public static function between(
328
        string $str,
329
        string $start,
330
        string $end,
331
        int $offset = 0,
332
        string $encoding = 'UTF-8'
333
    ): string {
334 16
        if ($encoding === 'UTF-8') {
335 8
            $posStart = \mb_strpos($str, $start, $offset);
336 8
            if ($posStart === false) {
337 1
                return '';
338
            }
339
340 7
            $substrIndex = $posStart + (int) \mb_strlen($start);
341 7
            $posEnd = \mb_strpos($str, $end, $substrIndex);
342
            if (
343 7
                $posEnd === false
344
                ||
345 7
                $posEnd === $substrIndex
346
            ) {
347 2
                return '';
348
            }
349
350 5
            return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex);
351
        }
352
353 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
354
355 8
        $posStart = self::strpos($str, $start, $offset, $encoding);
356 8
        if ($posStart === false) {
357 1
            return '';
358
        }
359
360 7
        $substrIndex = $posStart + (int) self::strlen($start, $encoding);
361 7
        $posEnd = self::strpos($str, $end, $substrIndex, $encoding);
362
        if (
363 7
            $posEnd === false
364
            ||
365 7
            $posEnd === $substrIndex
366
        ) {
367 2
            return '';
368
        }
369
370 5
        return (string) self::substr(
371 5
            $str,
372 5
            $substrIndex,
373 5
            $posEnd - $substrIndex,
374 5
            $encoding
375
        );
376
    }
377
378
    /**
379
     * Convert binary into an string.
380
     *
381
     * @param mixed $bin 1|0
382
     *
383
     * @return string
384
     */
385 2
    public static function binary_to_str($bin): string
386
    {
387 2
        if (!isset($bin[0])) {
388
            return '';
389
        }
390
391 2
        $convert = \base_convert($bin, 2, 16);
392 2
        if ($convert === '0') {
393 1
            return '';
394
        }
395
396 2
        return \pack('H*', $convert);
397
    }
398
399
    /**
400
     * Returns the UTF-8 Byte Order Mark Character.
401
     *
402
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
403
     *
404
     * @return string UTF-8 Byte Order Mark
405
     */
406 4
    public static function bom(): string
407
    {
408 4
        return "\xef\xbb\xbf";
409
    }
410
411
    /**
412
     * @alias of UTF8::chr_map()
413
     *
414
     * @see   UTF8::chr_map()
415
     *
416
     * @param array|string $callback
417
     * @param string       $str
418
     *
419
     * @return string[]
420
     */
421 2
    public static function callback($callback, string $str): array
422
    {
423 2
        return self::chr_map($callback, $str);
424
    }
425
426
    /**
427
     * Returns the character at $index, with indexes starting at 0.
428
     *
429
     * @param string $str      <p>The input string.</p>
430
     * @param int    $index    <p>Position of the character.</p>
431
     * @param string $encoding [optional] <p>Default is UTF-8</p>
432
     *
433
     * @return string the character at $index
434
     */
435 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
436
    {
437 9
        if ($encoding === 'UTF-8') {
438 5
            return (string) \mb_substr($str, $index, 1);
439
        }
440
441 4
        return (string) self::substr($str, $index, 1, $encoding);
442
    }
443
444
    /**
445
     * Returns an array consisting of the characters in the string.
446
     *
447
     * @param string $str <p>The input string.</p>
448
     *
449
     * @return string[] an array of chars
450
     */
451 3
    public static function chars(string $str): array
452
    {
453 3
        return self::str_split($str);
454
    }
455
456
    /**
457
     * This method will auto-detect your server environment for UTF-8 support.
458
     *
459
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
460
     */
461 5
    public static function checkForSupport()
462
    {
463 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
464
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
465
466
            // http://php.net/manual/en/book.mbstring.php
467
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
468
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
469
            if (self::$SUPPORT['mbstring'] === true) {
470
                \mb_internal_encoding('UTF-8');
471
                /** @noinspection UnusedFunctionResultInspection */
472
                /** @noinspection PhpComposerExtensionStubsInspection */
473
                \mb_regex_encoding('UTF-8');
474
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
475
            }
476
477
            // http://php.net/manual/en/book.iconv.php
478
            self::$SUPPORT['iconv'] = self::iconv_loaded();
479
480
            // http://php.net/manual/en/book.intl.php
481
            self::$SUPPORT['intl'] = self::intl_loaded();
482
            self::$SUPPORT['intl__transliterator_list_ids'] = [];
483
484
            if (
485
                self::$SUPPORT['intl'] === true
486
                &&
487
                \function_exists('transliterator_list_ids') === true
488
            ) {
489
                /** @noinspection PhpComposerExtensionStubsInspection */
490
                self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids();
491
            }
492
493
            // http://php.net/manual/en/class.intlchar.php
494
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
495
496
            // http://php.net/manual/en/book.ctype.php
497
            self::$SUPPORT['ctype'] = self::ctype_loaded();
498
499
            // http://php.net/manual/en/class.finfo.php
500
            self::$SUPPORT['finfo'] = self::finfo_loaded();
501
502
            // http://php.net/manual/en/book.json.php
503
            self::$SUPPORT['json'] = self::json_loaded();
504
505
            // http://php.net/manual/en/book.pcre.php
506
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
507
508
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
509
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
510
                \mb_internal_encoding('UTF-8');
511
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
512
            }
513
        }
514 5
    }
515
516
    /**
517
     * Generates a UTF-8 encoded character from the given code point.
518
     *
519
     * INFO: opposite to UTF8::ord()
520
     *
521
     * @param int|string $code_point <p>The code point for which to generate a character.</p>
522
     * @param string     $encoding   [optional] <p>Default is UTF-8</p>
523
     *
524
     * @return string|null multi-byte character, returns null on failure or empty input
525
     */
526 24
    public static function chr($code_point, string $encoding = 'UTF-8')
527
    {
528
        // init
529 24
        static $CHAR_CACHE = [];
530
531 24
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
532 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
533
        }
534
535
        if (
536 24
            $encoding !== 'UTF-8'
537
            &&
538 24
            $encoding !== 'ISO-8859-1'
539
            &&
540 24
            $encoding !== 'WINDOWS-1252'
541
            &&
542 24
            self::$SUPPORT['mbstring'] === false
543
        ) {
544
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
545
        }
546
547 24
        $cacheKey = $code_point . $encoding;
548 24
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
549 22
            return $CHAR_CACHE[$cacheKey];
550
        }
551
552 13
        if ($code_point <= 127) { // use "simple"-char only until "\x80"
553
554 12
            if (self::$CHR === null) {
555
                self::$CHR = (array) self::getData('chr');
556
            }
557
558
            /**
559
             * @psalm-suppress PossiblyNullArrayAccess
560
             */
561 12
            $chr = self::$CHR[$code_point];
562
563 12
            if ($encoding !== 'UTF-8') {
564 1
                $chr = self::encode($encoding, $chr);
565
            }
566
567 12
            return $CHAR_CACHE[$cacheKey] = $chr;
568
        }
569
570
        //
571
        // fallback via "IntlChar"
572
        //
573
574 7
        if (self::$SUPPORT['intlChar'] === true) {
575
            /** @noinspection PhpComposerExtensionStubsInspection */
576 7
            $chr = \IntlChar::chr($code_point);
577
578 7
            if ($encoding !== 'UTF-8') {
579
                $chr = self::encode($encoding, $chr);
580
            }
581
582 7
            return $CHAR_CACHE[$cacheKey] = $chr;
583
        }
584
585
        //
586
        // fallback via vanilla php
587
        //
588
589
        if (self::$CHR === null) {
590
            self::$CHR = (array) self::getData('chr');
591
        }
592
593
        $code_point = (int) $code_point;
594
        if ($code_point <= 0x7F) {
595
            /**
596
             * @psalm-suppress PossiblyNullArrayAccess
597
             */
598
            $chr = self::$CHR[$code_point];
599
        } elseif ($code_point <= 0x7FF) {
600
            /**
601
             * @psalm-suppress PossiblyNullArrayAccess
602
             */
603
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
604
                   self::$CHR[($code_point & 0x3F) + 0x80];
605
        } elseif ($code_point <= 0xFFFF) {
606
            /**
607
             * @psalm-suppress PossiblyNullArrayAccess
608
             */
609
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
610
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
611
                   self::$CHR[($code_point & 0x3F) + 0x80];
612
        } else {
613
            /**
614
             * @psalm-suppress PossiblyNullArrayAccess
615
             */
616
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
617
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
618
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
619
                   self::$CHR[($code_point & 0x3F) + 0x80];
620
        }
621
622
        if ($encoding !== 'UTF-8') {
623
            $chr = self::encode($encoding, $chr);
624
        }
625
626
        return $CHAR_CACHE[$cacheKey] = $chr;
627
    }
628
629
    /**
630
     * Applies callback to all characters of a string.
631
     *
632
     * @param array|string $callback <p>The callback function.</p>
633
     * @param string       $str      <p>UTF-8 string to run callback on.</p>
634
     *
635
     * @return string[] the outcome of callback
636
     */
637 2
    public static function chr_map($callback, string $str): array
638
    {
639 2
        return \array_map(
640 2
            $callback,
641 2
            self::str_split($str)
642
        );
643
    }
644
645
    /**
646
     * Generates an array of byte length of each character of a Unicode string.
647
     *
648
     * 1 byte => U+0000  - U+007F
649
     * 2 byte => U+0080  - U+07FF
650
     * 3 byte => U+0800  - U+FFFF
651
     * 4 byte => U+10000 - U+10FFFF
652
     *
653
     * @param string $str <p>The original unicode string.</p>
654
     *
655
     * @return int[] an array of byte lengths of each character
656
     */
657 4
    public static function chr_size_list(string $str): array
658
    {
659 4
        if ($str === '') {
660 4
            return [];
661
        }
662
663 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
664
            return \array_map(
665
                static function (string $data): int {
666
                    // "mb_" is available if overload is used, so use it ...
667
                    return \mb_strlen($data, 'CP850'); // 8-BIT
668
                },
669
                self::str_split($str)
670
            );
671
        }
672
673 4
        return \array_map('\strlen', self::str_split($str));
674
    }
675
676
    /**
677
     * Get a decimal code representation of a specific character.
678
     *
679
     * @param string $char <p>The input character.</p>
680
     *
681
     * @return int
682
     */
683 4
    public static function chr_to_decimal(string $char): int
684
    {
685 4
        $code = self::ord($char[0]);
686 4
        $bytes = 1;
687
688 4
        if (!($code & 0x80)) {
689
            // 0xxxxxxx
690 4
            return $code;
691
        }
692
693 4
        if (($code & 0xe0) === 0xc0) {
694
            // 110xxxxx
695 4
            $bytes = 2;
696 4
            $code &= ~0xc0;
697 4
        } elseif (($code & 0xf0) === 0xe0) {
698
            // 1110xxxx
699 4
            $bytes = 3;
700 4
            $code &= ~0xe0;
701 2
        } elseif (($code & 0xf8) === 0xf0) {
702
            // 11110xxx
703 2
            $bytes = 4;
704 2
            $code &= ~0xf0;
705
        }
706
707 4
        for ($i = 2; $i <= $bytes; ++$i) {
708
            // 10xxxxxx
709 4
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
710
        }
711
712 4
        return $code;
713
    }
714
715
    /**
716
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
717
     *
718
     * @param int|string $char <p>The input character</p>
719
     * @param string     $pfix [optional]
720
     *
721
     * @return string The code point encoded as U+xxxx
722
     */
723 2
    public static function chr_to_hex($char, string $pfix = 'U+'): string
724
    {
725 2
        if ($char === '') {
726 2
            return '';
727
        }
728
729 2
        if ($char === '&#0;') {
730 2
            $char = '';
731
        }
732
733 2
        return self::int_to_hex(self::ord((string) $char), $pfix);
734
    }
735
736
    /**
737
     * alias for "UTF8::chr_to_decimal()"
738
     *
739
     * @see UTF8::chr_to_decimal()
740
     *
741
     * @param string $chr
742
     *
743
     * @return int
744
     */
745 2
    public static function chr_to_int(string $chr): int
746
    {
747 2
        return self::chr_to_decimal($chr);
748
    }
749
750
    /**
751
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
752
     *
753
     * @param string $body     <p>The original string to be split.</p>
754
     * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
755
     * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
756
     *
757
     * @return string the chunked string
758
     */
759 4
    public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string
760
    {
761 4
        return \implode($end, self::str_split($body, $chunklen));
762
    }
763
764
    /**
765
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
766
     *
767
     * @param string $str                           <p>The string to be sanitized.</p>
768
     * @param bool   $remove_bom                    [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
769
     * @param bool   $normalize_whitespace          [optional] <p>Set to true, if you need to normalize the
770
     *                                              whitespace.</p>
771
     * @param bool   $normalize_msword              [optional] <p>Set to true, if you need to normalize MS Word chars
772
     *                                              e.g.: "…"
773
     *                                              => "..."</p>
774
     * @param bool   $keep_non_breaking_space       [optional] <p>Set to true, to keep non-breaking-spaces, in
775
     *                                              combination with
776
     *                                              $normalize_whitespace</p>
777
     * @param bool   $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question
778
     *                                              mark e.g.: "�"</p>
779
     * @param bool   $remove_invisible_characters   [optional] <p>Set to false, if you not want to remove invisible
780
     *                                              characters e.g.: "\0"</p>
781
     *
782
     * @return string clean UTF-8 encoded string
783
     */
784 113
    public static function clean(
785
        string $str,
786
        bool $remove_bom = false,
787
        bool $normalize_whitespace = false,
788
        bool $normalize_msword = false,
789
        bool $keep_non_breaking_space = false,
790
        bool $replace_diamond_question_mark = false,
791
        bool $remove_invisible_characters = true
792
    ): string {
793
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
794
        // caused connection reset problem on larger strings
795
796 113
        $regx = '/
797
          (
798
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
799
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
800
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
801
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
802
            ){1,100}                      # ...one or more times
803
          )
804
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
805
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
806
        /x';
807 113
        $str = (string) \preg_replace($regx, '$1', $str);
808
809 113
        if ($replace_diamond_question_mark === true) {
810 60
            $str = self::replace_diamond_question_mark($str, '');
811
        }
812
813 113
        if ($remove_invisible_characters === true) {
814 113
            $str = self::remove_invisible_characters($str);
815
        }
816
817 113
        if ($normalize_whitespace === true) {
818 64
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
819
        }
820
821 113
        if ($normalize_msword === true) {
822 32
            $str = self::normalize_msword($str);
823
        }
824
825 113
        if ($remove_bom === true) {
826 64
            $str = self::remove_bom($str);
827
        }
828
829 113
        return $str;
830
    }
831
832
    /**
833
     * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
834
     *
835
     * @param string $str <p>The input string.</p>
836
     *
837
     * @return string
838
     */
839 33
    public static function cleanup($str): string
840
    {
841
        // init
842 33
        $str = (string) $str;
843
844 33
        if ($str === '') {
845 5
            return '';
846
        }
847
848
        // fixed ISO <-> UTF-8 Errors
849 33
        $str = self::fix_simple_utf8($str);
850
851
        // remove all none UTF-8 symbols
852
        // && remove diamond question mark (�)
853
        // && remove remove invisible characters (e.g. "\0")
854
        // && remove BOM
855
        // && normalize whitespace chars (but keep non-breaking-spaces)
856 33
        return self::clean(
857 33
            $str,
858 33
            true,
859 33
            true,
860 33
            false,
861 33
            true,
862 33
            true,
863 33
            true
864
        );
865
    }
866
867
    /**
868
     * Accepts a string or a array of strings and returns an array of Unicode code points.
869
     *
870
     * INFO: opposite to UTF8::string()
871
     *
872
     * @param string|string[] $arg     <p>A UTF-8 encoded string or an array of such strings.</p>
873
     * @param bool            $u_style <p>If True, will return code points in U+xxxx format,
874
     *                                 default, code points will be returned as integers.</p>
875
     *
876
     * @return array<int|string>
877
     *                           The array of code points:<br>
878
     *                           array<int> for $u_style === false<br>
879
     *                           array<string> for $u_style === true<br>
880
     */
881 12
    public static function codepoints($arg, bool $u_style = false): array
882
    {
883 12
        if (\is_string($arg) === true) {
884 12
            $arg = self::str_split($arg);
885
        }
886
887 12
        $arg = \array_map(
888
            [
889 12
                self::class,
890
                'ord',
891
            ],
892 12
            $arg
893
        );
894
895 12
        if (\count($arg) === 0) {
896 7
            return [];
897
        }
898
899 11
        if ($u_style === true) {
900 2
            $arg = \array_map(
901
                [
902 2
                    self::class,
903
                    'int_to_hex',
904
                ],
905 2
                $arg
906
            );
907
        }
908
909 11
        return $arg;
910
    }
911
912
    /**
913
     * Trims the string and replaces consecutive whitespace characters with a
914
     * single space. This includes tabs and newline characters, as well as
915
     * multibyte whitespace such as the thin space and ideographic space.
916
     *
917
     * @param string $str <p>The input string.</p>
918
     *
919
     * @return string string with a trimmed $str and condensed whitespace
920
     */
921 13
    public static function collapse_whitespace(string $str): string
922
    {
923 13
        if (self::$SUPPORT['mbstring'] === true) {
924
            /** @noinspection PhpComposerExtensionStubsInspection */
925 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
926
        }
927
928
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
929
    }
930
931
    /**
932
     * Returns count of characters used in a string.
933
     *
934
     * @param string $str                <p>The input string.</p>
935
     * @param bool   $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
936
     * @param bool   $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
937
     *
938
     * @return int[] an associative array of Character as keys and
939
     *               their count as values
940
     */
941 19
    public static function count_chars(
942
        string $str,
943
        bool $cleanUtf8 = false,
944
        bool $tryToUseMbFunction = true
945
    ): array {
946 19
        return \array_count_values(
947 19
            self::str_split(
948 19
                $str,
949 19
                1,
950 19
                $cleanUtf8,
951 19
                $tryToUseMbFunction
952
            )
953
        );
954
    }
955
956
    /**
957
     * Remove css media-queries.
958
     *
959
     * @param string $str
960
     *
961
     * @return string
962
     */
963 1
    public static function css_stripe_media_queries(string $str): string
964
    {
965 1
        return (string) \preg_replace(
966 1
            '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
967 1
            '',
968 1
            $str
969
        );
970
    }
971
972
    /**
973
     * Checks whether ctype is available on the server.
974
     *
975
     * @return bool
976
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
977
     */
978
    public static function ctype_loaded(): bool
979
    {
980
        return \extension_loaded('ctype');
981
    }
982
983
    /**
984
     * Converts a int-value into an UTF-8 character.
985
     *
986
     * @param mixed $int
987
     *
988
     * @return string
989
     */
990 16
    public static function decimal_to_chr($int): string
991
    {
992 16
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
993
    }
994
995
    /**
996
     * Decodes a MIME header field
997
     *
998
     * @param string $str
999
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1000
     *
1001
     * @return false|string
1002
     *                      A decoded MIME field on success,
1003
     *                      or false if an error occurs during the decoding
1004
     */
1005
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1006
    {
1007
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1008
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1009
        }
1010
1011
        if (self::$SUPPORT['iconv'] === true) {
1012
            return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1013
        }
1014
1015
        if ($encoding !== 'UTF-8') {
1016
            $str = self::encode($encoding, $str);
1017
        }
1018
1019
        return \mb_decode_mimeheader($str);
1020
    }
1021
1022
    /**
1023
     * Encode a string with a new charset-encoding.
1024
     *
1025
     * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1026
     *        so you can call this function also on a UTF-8 String and you don't mess the string.
1027
     *
1028
     * @param string $toEncoding             <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1029
     * @param string $str                    <p>The input string</p>
1030
     * @param bool   $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double
1031
     *                                       encoding for UTF-8)<br> otherwise we auto-detect the current
1032
     *                                       string-encoding</p>
1033
     * @param string $fromEncoding           [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1034
     *                                       A empty string will trigger the autodetect anyway.</p>
1035
     *
1036
     * @return string
1037
     *
1038
     * @psalm-suppress InvalidReturnStatement
1039
     */
1040 28
    public static function encode(
1041
        string $toEncoding,
1042
        string $str,
1043
        bool $autodetectFromEncoding = true,
1044
        string $fromEncoding = ''
1045
    ): string {
1046 28
        if ($str === '' || $toEncoding === '') {
1047 13
            return $str;
1048
        }
1049
1050 28
        if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') {
1051 7
            $toEncoding = self::normalize_encoding($toEncoding, 'UTF-8');
1052
        }
1053
1054 28
        if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') {
1055 2
            $fromEncoding = self::normalize_encoding($fromEncoding, null);
1056
        }
1057
1058
        if (
1059 28
            $toEncoding
1060
            &&
1061 28
            $fromEncoding
1062
            &&
1063 28
            $fromEncoding === $toEncoding
1064
        ) {
1065
            return $str;
1066
        }
1067
1068 28
        if ($toEncoding === 'JSON') {
1069 1
            $return = self::json_encode($str);
1070 1
            if ($return === false) {
1071
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1072
            }
1073
1074 1
            return $return;
1075
        }
1076 28
        if ($fromEncoding === 'JSON') {
1077 1
            $str = self::json_decode($str);
1078 1
            $fromEncoding = '';
1079
        }
1080
1081 28
        if ($toEncoding === 'BASE64') {
1082 2
            return \base64_encode($str);
1083
        }
1084 28
        if ($fromEncoding === 'BASE64') {
1085 2
            $str = \base64_decode($str, true);
1086 2
            $fromEncoding = '';
1087
        }
1088
1089 28
        if ($toEncoding === 'HTML-ENTITIES') {
1090 2
            return self::html_encode($str, true, 'UTF-8');
1091
        }
1092 28
        if ($fromEncoding === 'HTML-ENTITIES') {
1093 2
            $str = self::html_decode($str, \ENT_COMPAT, 'UTF-8');
1094 2
            $fromEncoding = '';
1095
        }
1096
1097 28
        $fromEncodingDetected = false;
1098
        if (
1099 28
            $autodetectFromEncoding === true
1100
            ||
1101 28
            !$fromEncoding
1102
        ) {
1103 28
            $fromEncodingDetected = self::str_detect_encoding($str);
1104
        }
1105
1106
        // DEBUG
1107
        //var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n");
1108
1109 28
        if ($fromEncodingDetected !== false) {
1110 24
            $fromEncoding = $fromEncodingDetected;
1111 7
        } elseif ($autodetectFromEncoding === true) {
1112
            // fallback for the "autodetect"-mode
1113 7
            return self::to_utf8($str);
1114
        }
1115
1116
        if (
1117 24
            !$fromEncoding
1118
            ||
1119 24
            $fromEncoding === $toEncoding
1120
        ) {
1121 15
            return $str;
1122
        }
1123
1124
        if (
1125 18
            $toEncoding === 'UTF-8'
1126
            &&
1127
            (
1128 16
                $fromEncoding === 'WINDOWS-1252'
1129
                ||
1130 18
                $fromEncoding === 'ISO-8859-1'
1131
            )
1132
        ) {
1133 13
            return self::to_utf8($str);
1134
        }
1135
1136
        if (
1137 11
            $toEncoding === 'ISO-8859-1'
1138
            &&
1139
            (
1140 6
                $fromEncoding === 'WINDOWS-1252'
1141
                ||
1142 11
                $fromEncoding === 'UTF-8'
1143
            )
1144
        ) {
1145 6
            return self::to_iso8859($str);
1146
        }
1147
1148
        if (
1149 9
            $toEncoding !== 'UTF-8'
1150
            &&
1151 9
            $toEncoding !== 'ISO-8859-1'
1152
            &&
1153 9
            $toEncoding !== 'WINDOWS-1252'
1154
            &&
1155 9
            self::$SUPPORT['mbstring'] === false
1156
        ) {
1157
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING);
1158
        }
1159
1160 9
        if (self::$SUPPORT['mbstring'] === true) {
1161
            // warning: do not use the symfony polyfill here
1162 9
            $strEncoded = \mb_convert_encoding(
1163 9
                $str,
1164 9
                $toEncoding,
1165 9
                $fromEncoding
1166
            );
1167
1168 9
            if ($strEncoded) {
1169 9
                return $strEncoded;
1170
            }
1171
        }
1172
1173
        $return = \iconv($fromEncoding, $toEncoding, $str);
1174
        if ($return !== false) {
1175
            return $return;
1176
        }
1177
1178
        return $str;
1179
    }
1180
1181
    /**
1182
     * @param string $str
1183
     * @param string $fromCharset      [optional] <p>Set the input charset.</p>
1184
     * @param string $toCharset        [optional] <p>Set the output charset.</p>
1185
     * @param string $transferEncoding [optional] <p>Set the transfer encoding.</p>
1186
     * @param string $linefeed         [optional] <p>Set the used linefeed.</p>
1187
     * @param int    $indent           [optional] <p>Set the max length indent.</p>
1188
     *
1189
     * @return false|string
1190
     *                      An encoded MIME field on success,
1191
     *                      or false if an error occurs during the encoding
1192
     */
1193
    public static function encode_mimeheader(
1194
        $str,
1195
        $fromCharset = 'UTF-8',
1196
        $toCharset = 'UTF-8',
1197
        $transferEncoding = 'Q',
1198
        $linefeed = "\r\n",
1199
        $indent = 76
1200
    ) {
1201
        if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') {
1202
            $fromCharset = self::normalize_encoding($fromCharset, 'UTF-8');
1203
        }
1204
1205
        if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') {
1206
            $toCharset = self::normalize_encoding($toCharset, 'UTF-8');
1207
        }
1208
1209
        return \iconv_mime_encode(
1210
            '',
1211
            $str,
1212
            [
1213
                'scheme'           => $transferEncoding,
1214
                'line-length'      => $indent,
1215
                'input-charset'    => $fromCharset,
1216
                'output-charset'   => $toCharset,
1217
                'line-break-chars' => $linefeed,
1218
            ]
1219
        );
1220
    }
1221
1222
    /**
1223
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1224
     *
1225
     * @param string   $str                    <p>The input string.</p>
1226
     * @param string   $search                 <p>The searched string.</p>
1227
     * @param int|null $length                 [optional] <p>Default: null === text->length / 2</p>
1228
     * @param string   $replacerForSkippedText [optional] <p>Default: …</p>
1229
     * @param string   $encoding               [optional] <p>Set the charset for e.g. "mb_" function</p>
1230
     *
1231
     * @return string
1232
     */
1233 1
    public static function extract_text(
1234
        string $str,
1235
        string $search = '',
1236
        int $length = null,
1237
        string $replacerForSkippedText = '…',
1238
        string $encoding = 'UTF-8'
1239
    ): string {
1240 1
        if ($str === '') {
1241 1
            return '';
1242
        }
1243
1244 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1245
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1246
        }
1247
1248 1
        $trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1249
1250 1
        if ($length === null) {
1251 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2, 0);
1252
        }
1253
1254 1
        if ($search === '') {
1255 1
            if ($encoding === 'UTF-8') {
1256 1
                if ($length > 0) {
1257 1
                    $stringLength = (int) \mb_strlen($str);
1258 1
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1259
                } else {
1260 1
                    $end = 0;
1261
                }
1262
1263 1
                $pos = (int) \min(
1264 1
                    \mb_strpos($str, ' ', $end),
1265 1
                    \mb_strpos($str, '.', $end)
1266
                );
1267
            } else {
1268
                if ($length > 0) {
1269
                    $stringLength = (int) self::strlen($str, $encoding);
1270
                    $end = ($length - 1) > $stringLength ? $stringLength : ($length - 1);
1271
                } else {
1272
                    $end = 0;
1273
                }
1274
1275
                $pos = (int) \min(
1276
                    self::strpos($str, ' ', $end, $encoding),
1277
                    self::strpos($str, '.', $end, $encoding)
1278
                );
1279
            }
1280
1281 1
            if ($pos) {
1282 1
                if ($encoding === 'UTF-8') {
1283 1
                    $strSub = \mb_substr($str, 0, $pos);
1284
                } else {
1285
                    $strSub = self::substr($str, 0, $pos, $encoding);
1286
                }
1287
1288 1
                if ($strSub === false) {
1289
                    return '';
1290
                }
1291
1292 1
                return \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1293
            }
1294
1295
            return $str;
1296
        }
1297
1298 1
        if ($encoding === 'UTF-8') {
1299 1
            $wordPos = (int) \mb_stripos($str, $search);
1300 1
            $halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2);
1301
        } else {
1302
            $wordPos = (int) self::stripos($str, $search, 0, $encoding);
1303
            $halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1304
        }
1305
1306 1
        $pos_start = 0;
1307 1
        if ($halfSide > 0) {
1308 1
            if ($encoding === 'UTF-8') {
1309 1
                $halfText = \mb_substr($str, 0, $halfSide);
1310
            } else {
1311
                $halfText = self::substr($str, 0, $halfSide, $encoding);
1312
            }
1313 1
            if ($halfText !== false) {
1314 1
                if ($encoding === 'UTF-8') {
1315 1
                    $pos_start = (int) \max(
1316 1
                        \mb_strrpos($halfText, ' '),
1317 1
                        \mb_strrpos($halfText, '.')
1318
                    );
1319
                } else {
1320
                    $pos_start = (int) \max(
1321
                        self::strrpos($halfText, ' ', 0, $encoding),
1322
                        self::strrpos($halfText, '.', 0, $encoding)
1323
                    );
1324
                }
1325
            }
1326
        }
1327
1328 1
        if ($wordPos && $halfSide > 0) {
1329 1
            $offset = $pos_start + $length - 1;
1330 1
            $realLength = (int) self::strlen($str, $encoding);
1331
1332 1
            if ($offset > $realLength) {
1333
                $offset = $realLength;
1334
            }
1335
1336 1
            if ($encoding === 'UTF-8') {
1337 1
                $pos_end = (int) \min(
1338 1
                    \mb_strpos($str, ' ', $offset),
1339 1
                    \mb_strpos($str, '.', $offset)
1340 1
                ) - $pos_start;
1341
            } else {
1342
                $pos_end = (int) \min(
1343
                    self::strpos($str, ' ', $offset, $encoding),
1344
                    self::strpos($str, '.', $offset, $encoding)
1345
                ) - $pos_start;
1346
            }
1347
1348 1
            if (!$pos_end || $pos_end <= 0) {
1349 1
                if ($encoding === 'UTF-8') {
1350 1
                    $strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1351
                } else {
1352
                    $strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1353
                }
1354 1
                if ($strSub !== false) {
1355 1
                    $extract = $replacerForSkippedText . \ltrim($strSub, $trimChars);
1356
                } else {
1357 1
                    $extract = '';
1358
                }
1359
            } else {
1360 1
                if ($encoding === 'UTF-8') {
1361 1
                    $strSub = \mb_substr($str, $pos_start, $pos_end);
1362
                } else {
1363
                    $strSub = self::substr($str, $pos_start, $pos_end, $encoding);
1364
                }
1365 1
                if ($strSub !== false) {
1366 1
                    $extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText;
1367
                } else {
1368 1
                    $extract = '';
1369
                }
1370
            }
1371
        } else {
1372 1
            $offset = $length - 1;
1373 1
            $trueLength = (int) self::strlen($str, $encoding);
1374
1375 1
            if ($offset > $trueLength) {
1376
                $offset = $trueLength;
1377
            }
1378
1379 1
            if ($encoding === 'UTF-8') {
1380 1
                $pos_end = (int) \min(
1381 1
                    \mb_strpos($str, ' ', $offset),
1382 1
                    \mb_strpos($str, '.', $offset)
1383
                );
1384
            } else {
1385
                $pos_end = (int) \min(
1386
                    self::strpos($str, ' ', $offset, $encoding),
1387
                    self::strpos($str, '.', $offset, $encoding)
1388
                );
1389
            }
1390
1391 1
            if ($pos_end) {
1392 1
                if ($encoding === 'UTF-8') {
1393 1
                    $strSub = \mb_substr($str, 0, $pos_end);
1394
                } else {
1395
                    $strSub = self::substr($str, 0, $pos_end, $encoding);
1396
                }
1397 1
                if ($strSub !== false) {
1398 1
                    $extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText;
1399
                } else {
1400 1
                    $extract = '';
1401
                }
1402
            } else {
1403 1
                $extract = $str;
1404
            }
1405
        }
1406
1407 1
        return $extract;
1408
    }
1409
1410
    /**
1411
     * Reads entire file into a string.
1412
     *
1413
     * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1414
     *
1415
     * @see http://php.net/manual/en/function.file-get-contents.php
1416
     *
1417
     * @param string        $filename         <p>
1418
     *                                        Name of the file to read.
1419
     *                                        </p>
1420
     * @param bool          $use_include_path [optional] <p>
1421
     *                                        Prior to PHP 5, this parameter is called
1422
     *                                        use_include_path and is a bool.
1423
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1424
     *                                        to trigger include path
1425
     *                                        search.
1426
     *                                        </p>
1427
     * @param resource|null $context          [optional] <p>
1428
     *                                        A valid context resource created with
1429
     *                                        stream_context_create. If you don't need to use a
1430
     *                                        custom context, you can skip this parameter by &null;.
1431
     *                                        </p>
1432
     * @param int|null      $offset           [optional] <p>
1433
     *                                        The offset where the reading starts.
1434
     *                                        </p>
1435
     * @param int|null      $maxLength        [optional] <p>
1436
     *                                        Maximum length of data read. The default is to read until end
1437
     *                                        of file is reached.
1438
     *                                        </p>
1439
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1440
     * @param bool          $convertToUtf8    <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1441
     *                                        some files, because they used non default utf-8 chars. Binary files
1442
     *                                        like images or pdf will not be converted.</p>
1443
     * @param string        $fromEncoding     [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1444
     *                                        A empty string will trigger the autodetect anyway.</p>
1445
     *
1446
     * @return false|string the function returns the read data or false on failure
1447
     */
1448 12
    public static function file_get_contents(
1449
        string $filename,
1450
        bool $use_include_path = false,
1451
        $context = null,
1452
        int $offset = null,
1453
        int $maxLength = null,
1454
        int $timeout = 10,
1455
        bool $convertToUtf8 = true,
1456
        string $fromEncoding = ''
1457
    ) {
1458
        // init
1459 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1460
1461 12
        if ($timeout && $context === null) {
1462 9
            $context = \stream_context_create(
1463
                [
1464
                    'http' => [
1465 9
                        'timeout' => $timeout,
1466
                    ],
1467
                ]
1468
            );
1469
        }
1470
1471 12
        if ($offset === null) {
1472 12
            $offset = 0;
1473
        }
1474
1475 12
        if (\is_int($maxLength) === true) {
1476 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength);
1477
        } else {
1478 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1479
        }
1480
1481
        // return false on error
1482 12
        if ($data === false) {
1483
            return false;
1484
        }
1485
1486 12
        if ($convertToUtf8 === true) {
1487
            if (
1488 12
                self::is_binary($data, true) === true
1489
                &&
1490 12
                self::is_utf16($data, false) === false
1491
                &&
1492 12
                self::is_utf32($data, false) === false
1493 7
            ) {
1494
                // do nothing, it's binary and not UTF16 or UTF32
1495
            } else {
1496 9
                $data = self::encode('UTF-8', $data, false, $fromEncoding);
1497 9
                $data = self::cleanup($data);
1498
            }
1499
        }
1500
1501 12
        return $data;
1502
    }
1503
1504
    /**
1505
     * Checks if a file starts with BOM (Byte Order Mark) character.
1506
     *
1507
     * @param string $file_path <p>Path to a valid file.</p>
1508
     *
1509
     * @throws \RuntimeException if file_get_contents() returned false
1510
     *
1511
     * @return bool
1512
     *              <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise
1513
     */
1514 2
    public static function file_has_bom(string $file_path): bool
1515
    {
1516 2
        $file_content = \file_get_contents($file_path);
1517 2
        if ($file_content === false) {
1518
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1519
        }
1520
1521 2
        return self::string_has_bom($file_content);
1522
    }
1523
1524
    /**
1525
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1526
     *
1527
     * @param mixed  $var
1528
     * @param int    $normalization_form
1529
     * @param string $leading_combining
1530
     *
1531
     * @return mixed
1532
     */
1533 62
    public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌')
1534
    {
1535 62
        switch (\gettype($var)) {
1536 62
            case 'array':
1537 6
                foreach ($var as $k => &$v) {
1538 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1539
                }
1540 6
                unset($v);
1541
1542 6
                break;
1543 62
            case 'object':
1544 4
                foreach ($var as $k => &$v) {
1545 4
                    $v = self::filter($v, $normalization_form, $leading_combining);
1546
                }
1547 4
                unset($v);
1548
1549 4
                break;
1550 62
            case 'string':
1551
1552 62
                if (\strpos($var, "\r") !== false) {
1553
                    // Workaround https://bugs.php.net/65732
1554 3
                    $var = self::normalize_line_ending($var);
1555
                }
1556
1557 62
                if (self::is_ascii($var) === false) {
1558 32
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1559 27
                        $n = '-';
1560
                    } else {
1561 12
                        $n = \Normalizer::normalize($var, $normalization_form);
1562
1563 12
                        if (isset($n[0])) {
1564 7
                            $var = $n;
1565
                        } else {
1566 8
                            $var = self::encode('UTF-8', $var, true);
1567
                        }
1568
                    }
1569
1570
                    if (
1571 32
                        $var[0] >= "\x80"
1572
                        &&
1573 32
                        isset($n[0], $leading_combining[0])
1574
                        &&
1575 32
                        \preg_match('/^\p{Mn}/u', $var)
1576
                    ) {
1577
                        // Prevent leading combining chars
1578
                        // for NFC-safe concatenations.
1579 3
                        $var = $leading_combining . $var;
1580
                    }
1581
                }
1582
1583 62
                break;
1584
        }
1585
1586 62
        return $var;
1587
    }
1588
1589
    /**
1590
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1591
     *
1592
     * Gets a specific external variable by name and optionally filters it
1593
     *
1594
     * @see  http://php.net/manual/en/function.filter-input.php
1595
     *
1596
     * @param int    $type          <p>
1597
     *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1598
     *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1599
     *                              <b>INPUT_ENV</b>.
1600
     *                              </p>
1601
     * @param string $variable_name <p>
1602
     *                              Name of a variable to get.
1603
     *                              </p>
1604
     * @param int    $filter        [optional] <p>
1605
     *                              The ID of the filter to apply. The
1606
     *                              manual page lists the available filters.
1607
     *                              </p>
1608
     * @param mixed  $options       [optional] <p>
1609
     *                              Associative array of options or bitwise disjunction of flags. If filter
1610
     *                              accepts options, flags can be provided in "flags" field of array.
1611
     *                              </p>
1612
     *
1613
     * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
1614
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1615
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1616
     */
1617
    public static function filter_input(
1618
        int $type,
1619
        string $variable_name,
1620
        int $filter = \FILTER_DEFAULT,
1621
        $options = null
1622
    ) {
1623
        if (\func_num_args() < 4) {
1624
            $var = \filter_input($type, $variable_name, $filter);
1625
        } else {
1626
            $var = \filter_input($type, $variable_name, $filter, $options);
1627
        }
1628
1629
        return self::filter($var);
1630
    }
1631
1632
    /**
1633
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1634
     *
1635
     * Gets external variables and optionally filters them
1636
     *
1637
     * @see  http://php.net/manual/en/function.filter-input-array.php
1638
     *
1639
     * @param int   $type       <p>
1640
     *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1641
     *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1642
     *                          <b>INPUT_ENV</b>.
1643
     *                          </p>
1644
     * @param mixed $definition [optional] <p>
1645
     *                          An array defining the arguments. A valid key is a string
1646
     *                          containing a variable name and a valid value is either a filter type, or an array
1647
     *                          optionally specifying the filter, flags and options. If the value is an
1648
     *                          array, valid keys are filter which specifies the
1649
     *                          filter type,
1650
     *                          flags which specifies any flags that apply to the
1651
     *                          filter, and options which specifies any options that
1652
     *                          apply to the filter. See the example below for a better understanding.
1653
     *                          </p>
1654
     *                          <p>
1655
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1656
     *                          input array are filtered by this filter.
1657
     *                          </p>
1658
     * @param bool  $add_empty  [optional] <p>
1659
     *                          Add missing keys as <b>NULL</b> to the return value.
1660
     *                          </p>
1661
     *
1662
     * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1663
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1664
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
1665
     *               is not set and <b>NULL</b> if the filter fails.
1666
     */
1667
    public static function filter_input_array(int $type, $definition = null, bool $add_empty = true)
1668
    {
1669
        if (\func_num_args() < 2) {
1670
            $a = \filter_input_array($type);
1671
        } else {
1672
            $a = \filter_input_array($type, $definition, $add_empty);
1673
        }
1674
1675
        return self::filter($a);
1676
    }
1677
1678
    /**
1679
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1680
     *
1681
     * Filters a variable with a specified filter
1682
     *
1683
     * @see  http://php.net/manual/en/function.filter-var.php
1684
     *
1685
     * @param mixed $variable <p>
1686
     *                        Value to filter.
1687
     *                        </p>
1688
     * @param int   $filter   [optional] <p>
1689
     *                        The ID of the filter to apply. The
1690
     *                        manual page lists the available filters.
1691
     *                        </p>
1692
     * @param mixed $options  [optional] <p>
1693
     *                        Associative array of options or bitwise disjunction of flags. If filter
1694
     *                        accepts options, flags can be provided in "flags" field of array. For
1695
     *                        the "callback" filter, callable type should be passed. The
1696
     *                        callback must accept one argument, the value to be filtered, and return
1697
     *                        the value after filtering/sanitizing it.
1698
     *                        </p>
1699
     *                        <p>
1700
     *                        <code>
1701
     *                        // for filters that accept options, use this format
1702
     *                        $options = array(
1703
     *                        'options' => array(
1704
     *                        'default' => 3, // value to return if the filter fails
1705
     *                        // other options here
1706
     *                        'min_range' => 0
1707
     *                        ),
1708
     *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1709
     *                        );
1710
     *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1711
     *                        // for filter that only accept flags, you can pass them directly
1712
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1713
     *                        // for filter that only accept flags, you can also pass as an array
1714
     *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1715
     *                        array('flags' => FILTER_NULL_ON_FAILURE));
1716
     *                        // callback validate filter
1717
     *                        function foo($value)
1718
     *                        {
1719
     *                        // Expected format: Surname, GivenNames
1720
     *                        if (strpos($value, ", ") === false) return false;
1721
     *                        list($surname, $givennames) = explode(", ", $value, 2);
1722
     *                        $empty = (empty($surname) || empty($givennames));
1723
     *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1724
     *                        if ($empty || $notstrings) {
1725
     *                        return false;
1726
     *                        } else {
1727
     *                        return $value;
1728
     *                        }
1729
     *                        }
1730
     *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1731
     *                        </code>
1732
     *                        </p>
1733
     *
1734
     * @return mixed the filtered data, or <b>FALSE</b> if the filter fails
1735
     */
1736 2
    public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null)
1737
    {
1738 2
        if (\func_num_args() < 3) {
1739 2
            $variable = \filter_var($variable, $filter);
1740
        } else {
1741 2
            $variable = \filter_var($variable, $filter, $options);
1742
        }
1743
1744 2
        return self::filter($variable);
1745
    }
1746
1747
    /**
1748
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1749
     *
1750
     * Gets multiple variables and optionally filters them
1751
     *
1752
     * @see  http://php.net/manual/en/function.filter-var-array.php
1753
     *
1754
     * @param array $data       <p>
1755
     *                          An array with string keys containing the data to filter.
1756
     *                          </p>
1757
     * @param mixed $definition [optional] <p>
1758
     *                          An array defining the arguments. A valid key is a string
1759
     *                          containing a variable name and a valid value is either a
1760
     *                          filter type, or an
1761
     *                          array optionally specifying the filter, flags and options.
1762
     *                          If the value is an array, valid keys are filter
1763
     *                          which specifies the filter type,
1764
     *                          flags which specifies any flags that apply to the
1765
     *                          filter, and options which specifies any options that
1766
     *                          apply to the filter. See the example below for a better understanding.
1767
     *                          </p>
1768
     *                          <p>
1769
     *                          This parameter can be also an integer holding a filter constant. Then all values in the
1770
     *                          input array are filtered by this filter.
1771
     *                          </p>
1772
     * @param bool  $add_empty  [optional] <p>
1773
     *                          Add missing keys as <b>NULL</b> to the return value.
1774
     *                          </p>
1775
     *
1776
     * @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
1777
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
1778
     *               set
1779
     */
1780 2
    public static function filter_var_array(array $data, $definition = null, bool $add_empty = true)
1781
    {
1782 2
        if (\func_num_args() < 2) {
1783 2
            $a = \filter_var_array($data);
1784
        } else {
1785 2
            $a = \filter_var_array($data, $definition, $add_empty);
1786
        }
1787
1788 2
        return self::filter($a);
1789
    }
1790
1791
    /**
1792
     * Checks whether finfo is available on the server.
1793
     *
1794
     * @return bool
1795
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1796
     */
1797
    public static function finfo_loaded(): bool
1798
    {
1799
        return \class_exists('finfo');
1800
    }
1801
1802
    /**
1803
     * Returns the first $n characters of the string.
1804
     *
1805
     * @param string $str      <p>The input string.</p>
1806
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
1807
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1808
     *
1809
     * @return string
1810
     */
1811 13
    public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
1812
    {
1813 13
        if ($str === '' || $n <= 0) {
1814 5
            return '';
1815
        }
1816
1817 8
        if ($encoding === 'UTF-8') {
1818 4
            return (string) \mb_substr($str, 0, $n);
1819
        }
1820
1821 4
        return (string) self::substr($str, 0, $n, $encoding);
1822
    }
1823
1824
    /**
1825
     * Check if the number of unicode characters are not more than the specified integer.
1826
     *
1827
     * @param string $str      the original string to be checked
1828
     * @param int    $box_size the size in number of chars to be checked against string
1829
     *
1830
     * @return bool true if string is less than or equal to $box_size, false otherwise
1831
     */
1832 2
    public static function fits_inside(string $str, int $box_size): bool
1833
    {
1834 2
        return self::strlen($str) <= $box_size;
1835
    }
1836
1837
    /**
1838
     * Try to fix simple broken UTF-8 strings.
1839
     *
1840
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1841
     *
1842
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1843
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1844
     * See: http://en.wikipedia.org/wiki/Windows-1252
1845
     *
1846
     * @param string $str <p>The input string</p>
1847
     *
1848
     * @return string
1849
     */
1850 42
    public static function fix_simple_utf8(string $str): string
1851
    {
1852 42
        if ($str === '') {
1853 4
            return '';
1854
        }
1855
1856 42
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1857 42
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1858
1859 42
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1860 1
            if (self::$BROKEN_UTF8_FIX === null) {
1861 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
1862
            }
1863
1864 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
1865 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX);
1866
        }
1867
1868 42
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1869
    }
1870
1871
    /**
1872
     * Fix a double (or multiple) encoded UTF8 string.
1873
     *
1874
     * @param string|string[] $str you can use a string or an array of strings
1875
     *
1876
     * @return string|string[]
1877
     *                         Will return the fixed input-"array" or
1878
     *                         the fixed input-"string"
1879
     *
1880
     * @psalm-suppress InvalidReturnType
1881
     */
1882 2
    public static function fix_utf8($str)
1883
    {
1884 2
        if (\is_array($str) === true) {
1885 2
            foreach ($str as $k => &$v) {
1886 2
                $v = self::fix_utf8($v);
1887
            }
1888 2
            unset($v);
1889
1890
            /**
1891
             * @psalm-suppress InvalidReturnStatement
1892
             */
1893 2
            return $str;
1894
        }
1895
1896 2
        $str = (string) $str;
1897 2
        $last = '';
1898 2
        while ($last !== $str) {
1899 2
            $last = $str;
1900
            /**
1901
             * @psalm-suppress PossiblyInvalidArgument
1902
             */
1903 2
            $str = self::to_utf8(
1904 2
                self::utf8_decode($str, true)
1905
            );
1906
        }
1907
1908
        /**
1909
         * @psalm-suppress InvalidReturnStatement
1910
         */
1911 2
        return $str;
1912
    }
1913
1914
    /**
1915
     * Get character of a specific character.
1916
     *
1917
     * @param string $char
1918
     *
1919
     * @return string 'RTL' or 'LTR'
1920
     */
1921 2
    public static function getCharDirection(string $char): string
1922
    {
1923 2
        if (self::$SUPPORT['intlChar'] === true) {
1924
            /** @noinspection PhpComposerExtensionStubsInspection */
1925 2
            $tmpReturn = \IntlChar::charDirection($char);
1926
1927
            // from "IntlChar"-Class
1928
            $charDirection = [
1929 2
                'RTL' => [1, 13, 14, 15, 21],
1930
                'LTR' => [0, 11, 12, 20],
1931
            ];
1932
1933 2
            if (\in_array($tmpReturn, $charDirection['LTR'], true)) {
1934
                return 'LTR';
1935
            }
1936
1937 2
            if (\in_array($tmpReturn, $charDirection['RTL'], true)) {
1938 2
                return 'RTL';
1939
            }
1940
        }
1941
1942 2
        $c = static::chr_to_decimal($char);
1943
1944 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
1945 2
            return 'LTR';
1946
        }
1947
1948 2
        if ($c <= 0x85e) {
1949 2
            if ($c === 0x5be ||
1950 2
                $c === 0x5c0 ||
1951 2
                $c === 0x5c3 ||
1952 2
                $c === 0x5c6 ||
1953 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
1954 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
1955 2
                $c === 0x608 ||
1956 2
                $c === 0x60b ||
1957 2
                $c === 0x60d ||
1958 2
                $c === 0x61b ||
1959 2
                ($c >= 0x61e && $c <= 0x64a) ||
1960
                ($c >= 0x66d && $c <= 0x66f) ||
1961
                ($c >= 0x671 && $c <= 0x6d5) ||
1962
                ($c >= 0x6e5 && $c <= 0x6e6) ||
1963
                ($c >= 0x6ee && $c <= 0x6ef) ||
1964
                ($c >= 0x6fa && $c <= 0x70d) ||
1965
                $c === 0x710 ||
1966
                ($c >= 0x712 && $c <= 0x72f) ||
1967
                ($c >= 0x74d && $c <= 0x7a5) ||
1968
                $c === 0x7b1 ||
1969
                ($c >= 0x7c0 && $c <= 0x7ea) ||
1970
                ($c >= 0x7f4 && $c <= 0x7f5) ||
1971
                $c === 0x7fa ||
1972
                ($c >= 0x800 && $c <= 0x815) ||
1973
                $c === 0x81a ||
1974
                $c === 0x824 ||
1975
                $c === 0x828 ||
1976
                ($c >= 0x830 && $c <= 0x83e) ||
1977
                ($c >= 0x840 && $c <= 0x858) ||
1978 2
                $c === 0x85e
1979
            ) {
1980 2
                return 'RTL';
1981
            }
1982 2
        } elseif ($c === 0x200f) {
1983
            return 'RTL';
1984 2
        } elseif ($c >= 0xfb1d) {
1985 2
            if ($c === 0xfb1d ||
1986 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
1987 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
1988 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
1989 2
                $c === 0xfb3e ||
1990 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
1991 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
1992 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
1993 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
1994 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
1995 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
1996 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
1997 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
1998 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
1999 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2000 2
                $c === 0x10808 ||
2001 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2002 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2003 2
                $c === 0x1083c ||
2004 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2005 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2006 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2007 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2008 2
                $c === 0x1093f ||
2009 2
                $c === 0x10a00 ||
2010 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2011 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2012 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2013 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2014 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2015 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2016 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2017 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2018 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2019 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2020
            ) {
2021 2
                return 'RTL';
2022
            }
2023
        }
2024
2025 2
        return 'LTR';
2026
    }
2027
2028
    /**
2029
     * Check for php-support.
2030
     *
2031
     * @param string|null $key
2032
     *
2033
     * @return mixed
2034
     *               Return the full support-"array", if $key === null<br>
2035
     *               return bool-value, if $key is used and available<br>
2036
     *               otherwise return <strong>null</strong>
2037
     */
2038 27
    public static function getSupportInfo(string $key = null)
2039
    {
2040 27
        if ($key === null) {
2041 4
            return self::$SUPPORT;
2042
        }
2043
2044 25
        if (!isset(self::$SUPPORT[$key])) {
2045 2
            return null;
2046
        }
2047
2048 23
        return self::$SUPPORT[$key];
2049
    }
2050
2051
    /**
2052
     * Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf)
2053
     *          if you need more supported types, please use e.g. "finfo"
2054
     *
2055
     * @param string $str
2056
     * @param array  $fallback with this keys: 'ext', 'mime', 'type'
2057
     *
2058
     * @return array
2059
     *               with this keys: 'ext', 'mime', 'type'
2060
     */
2061 39
    public static function get_file_type(
2062
        string $str,
2063
        array $fallback = [
2064
            'ext'  => null,
2065
            'mime' => 'application/octet-stream',
2066
            'type' => null,
2067
        ]
2068
    ): array {
2069 39
        if ($str === '') {
2070
            return $fallback;
2071
        }
2072
2073 39
        $str_info = \substr($str, 0, 2);
2074 39
        if ($str_info === false || \strlen($str_info) !== 2) {
2075 11
            return $fallback;
2076
        }
2077
2078 35
        $str_info = \unpack('C2chars', $str_info);
2079 35
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2080
2081
        // DEBUG
2082
        //var_dump($type_code);
2083
2084
        switch ($type_code) {
2085 35
            case 3780:
2086 5
                $ext = 'pdf';
2087 5
                $mime = 'application/pdf';
2088 5
                $type = 'binary';
2089
2090 5
                break;
2091 35
            case 7790:
2092
                $ext = 'exe';
2093
                $mime = 'application/octet-stream';
2094
                $type = 'binary';
2095
2096
                break;
2097 35
            case 7784:
2098
                $ext = 'midi';
2099
                $mime = 'audio/x-midi';
2100
                $type = 'binary';
2101
2102
                break;
2103 35
            case 8075:
2104 7
                $ext = 'zip';
2105 7
                $mime = 'application/zip';
2106 7
                $type = 'binary';
2107
2108 7
                break;
2109 35
            case 8297:
2110
                $ext = 'rar';
2111
                $mime = 'application/rar';
2112
                $type = 'binary';
2113
2114
                break;
2115 35
            case 255216:
2116
                $ext = 'jpg';
2117
                $mime = 'image/jpeg';
2118
                $type = 'binary';
2119
2120
                break;
2121 35
            case 7173:
2122
                $ext = 'gif';
2123
                $mime = 'image/gif';
2124
                $type = 'binary';
2125
2126
                break;
2127 35
            case 6677:
2128
                $ext = 'bmp';
2129
                $mime = 'image/bmp';
2130
                $type = 'binary';
2131
2132
                break;
2133 35
            case 13780:
2134 7
                $ext = 'png';
2135 7
                $mime = 'image/png';
2136 7
                $type = 'binary';
2137
2138 7
                break;
2139
            default:
2140 32
                return $fallback;
2141
        }
2142
2143
        return [
2144 7
            'ext'  => $ext,
2145 7
            'mime' => $mime,
2146 7
            'type' => $type,
2147
        ];
2148
    }
2149
2150
    /**
2151
     * @param int    $length        <p>Length of the random string.</p>
2152
     * @param string $possibleChars [optional] <p>Characters string for the random selection.</p>
2153
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
2154
     *
2155
     * @return string
2156
     */
2157 1
    public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string
2158
    {
2159
        // init
2160 1
        $i = 0;
2161 1
        $str = '';
2162
2163
        //
2164
        // add random chars
2165
        //
2166
2167 1
        if ($encoding === 'UTF-8') {
2168 1
            $maxlength = (int) \mb_strlen($possibleChars);
2169 1
            if ($maxlength === 0) {
2170 1
                return '';
2171
            }
2172
2173 1
            while ($i < $length) {
2174
                try {
2175 1
                    $randInt = \random_int(0, $maxlength - 1);
2176
                } catch (\Exception $e) {
2177
                    /** @noinspection RandomApiMigrationInspection */
2178
                    $randInt = \mt_rand(0, $maxlength - 1);
2179
                }
2180 1
                $char = \mb_substr($possibleChars, $randInt, 1);
2181 1
                if ($char !== false) {
2182 1
                    $str .= $char;
2183 1
                    ++$i;
2184
                }
2185
            }
2186
        } else {
2187
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2188
2189
            $maxlength = (int) self::strlen($possibleChars, $encoding);
2190
            if ($maxlength === 0) {
2191
                return '';
2192
            }
2193
2194
            while ($i < $length) {
2195
                try {
2196
                    $randInt = \random_int(0, $maxlength - 1);
2197
                } catch (\Exception $e) {
2198
                    /** @noinspection RandomApiMigrationInspection */
2199
                    $randInt = \mt_rand(0, $maxlength - 1);
2200
                }
2201
                $char = self::substr($possibleChars, $randInt, 1, $encoding);
2202
                if ($char !== false) {
2203
                    $str .= $char;
2204
                    ++$i;
2205
                }
2206
            }
2207
        }
2208
2209 1
        return $str;
2210
    }
2211
2212
    /**
2213
     * @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p>
2214
     * @param bool       $md5          [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2215
     *
2216
     * @return string
2217
     */
2218 1
    public static function get_unique_string($entropyExtra = '', bool $md5 = true): string
2219
    {
2220 1
        $uniqueHelper = \random_int(0, \mt_getrandmax()) .
2221 1
                        \session_id() .
2222 1
                        ($_SERVER['REMOTE_ADDR'] ?? '') .
2223 1
                        ($_SERVER['SERVER_ADDR'] ?? '') .
2224 1
                        $entropyExtra;
2225
2226 1
        $uniqueString = \uniqid($uniqueHelper, true);
2227
2228 1
        if ($md5) {
2229 1
            $uniqueString = \md5($uniqueString . $uniqueHelper);
2230
        }
2231
2232 1
        return $uniqueString;
2233
    }
2234
2235
    /**
2236
     * alias for "UTF8::string_has_bom()"
2237
     *
2238
     * @see        UTF8::string_has_bom()
2239
     *
2240
     * @param string $str
2241
     *
2242
     * @return bool
2243
     *
2244
     * @deprecated <p>use "UTF8::string_has_bom()"</p>
2245
     */
2246 2
    public static function hasBom(string $str): bool
2247
    {
2248 2
        return self::string_has_bom($str);
2249
    }
2250
2251
    /**
2252
     * Returns true if the string contains a lower case char, false otherwise.
2253
     *
2254
     * @param string $str <p>The input string.</p>
2255
     *
2256
     * @return bool whether or not the string contains a lower case character
2257
     */
2258 47
    public static function has_lowercase(string $str): bool
2259
    {
2260 47
        if (self::$SUPPORT['mbstring'] === true) {
2261
            /** @noinspection PhpComposerExtensionStubsInspection */
2262 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2263
        }
2264
2265
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2266
    }
2267
2268
    /**
2269
     * Returns true if the string contains an upper case char, false otherwise.
2270
     *
2271
     * @param string $str <p>The input string.</p>
2272
     *
2273
     * @return bool whether or not the string contains an upper case character
2274
     */
2275 12
    public static function has_uppercase(string $str): bool
2276
    {
2277 12
        if (self::$SUPPORT['mbstring'] === true) {
2278
            /** @noinspection PhpComposerExtensionStubsInspection */
2279 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2280
        }
2281
2282
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2283
    }
2284
2285
    /**
2286
     * Converts a hexadecimal-value into an UTF-8 character.
2287
     *
2288
     * @param string $hexdec <p>The hexadecimal value.</p>
2289
     *
2290
     * @return false|string one single UTF-8 character
2291
     */
2292 4
    public static function hex_to_chr(string $hexdec)
2293
    {
2294 4
        return self::decimal_to_chr(\hexdec($hexdec));
2295
    }
2296
2297
    /**
2298
     * Converts hexadecimal U+xxxx code point representation to integer.
2299
     *
2300
     * INFO: opposite to UTF8::int_to_hex()
2301
     *
2302
     * @param string $hexDec <p>The hexadecimal code point representation.</p>
2303
     *
2304
     * @return false|int the code point, or false on failure
2305
     */
2306 2
    public static function hex_to_int($hexDec)
2307
    {
2308
        // init
2309 2
        $hexDec = (string) $hexDec;
2310
2311 2
        if ($hexDec === '') {
2312 2
            return false;
2313
        }
2314
2315 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2316 2
            return \intval($match[1], 16);
2317
        }
2318
2319 2
        return false;
2320
    }
2321
2322
    /**
2323
     * alias for "UTF8::html_entity_decode()"
2324
     *
2325
     * @see UTF8::html_entity_decode()
2326
     *
2327
     * @param string $str
2328
     * @param int    $flags
2329
     * @param string $encoding
2330
     *
2331
     * @return string
2332
     */
2333 4
    public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2334
    {
2335 4
        return self::html_entity_decode($str, $flags, $encoding);
2336
    }
2337
2338
    /**
2339
     * Converts a UTF-8 string to a series of HTML numbered entities.
2340
     *
2341
     * INFO: opposite to UTF8::html_decode()
2342
     *
2343
     * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2344
     * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2345
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2346
     *
2347
     * @return string HTML numbered entities
2348
     */
2349 13
    public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
2350
    {
2351 13
        if ($str === '') {
2352 4
            return '';
2353
        }
2354
2355 13
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2356 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2357
        }
2358
2359
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2360 13
        if (self::$SUPPORT['mbstring'] === true) {
2361 13
            $startCode = 0x00;
2362 13
            if ($keepAsciiChars === true) {
2363 13
                $startCode = 0x80;
2364
            }
2365
2366 13
            if ($encoding === 'UTF-8') {
2367 13
                return \mb_encode_numericentity(
2368 13
                    $str,
2369 13
                    [$startCode, 0xfffff, 0, 0xfffff, 0]
2370
                );
2371
            }
2372
2373 4
            return \mb_encode_numericentity(
2374 4
                $str,
2375 4
                [$startCode, 0xfffff, 0, 0xfffff, 0],
2376 4
                $encoding
2377
            );
2378
        }
2379
2380
        //
2381
        // fallback via vanilla php
2382
        //
2383
2384
        return \implode(
2385
            '',
2386
            \array_map(
2387
                static function (string $chr) use ($keepAsciiChars, $encoding): string {
2388
                    return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding);
2389
                },
2390
                self::str_split($str)
2391
            )
2392
        );
2393
    }
2394
2395
    /**
2396
     * UTF-8 version of html_entity_decode()
2397
     *
2398
     * The reason we are not using html_entity_decode() by itself is because
2399
     * while it is not technically correct to leave out the semicolon
2400
     * at the end of an entity most browsers will still interpret the entity
2401
     * correctly. html_entity_decode() does not convert entities without
2402
     * semicolons, so we are left with our own little solution here. Bummer.
2403
     *
2404
     * Convert all HTML entities to their applicable characters
2405
     *
2406
     * INFO: opposite to UTF8::html_encode()
2407
     *
2408
     * @see http://php.net/manual/en/function.html-entity-decode.php
2409
     *
2410
     * @param string $str      <p>
2411
     *                         The input string.
2412
     *                         </p>
2413
     * @param int    $flags    [optional] <p>
2414
     *                         A bitmask of one or more of the following flags, which specify how to handle quotes
2415
     *                         and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2416
     *                         <table>
2417
     *                         Available <i>flags</i> constants
2418
     *                         <tr valign="top">
2419
     *                         <td>Constant Name</td>
2420
     *                         <td>Description</td>
2421
     *                         </tr>
2422
     *                         <tr valign="top">
2423
     *                         <td><b>ENT_COMPAT</b></td>
2424
     *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2425
     *                         </tr>
2426
     *                         <tr valign="top">
2427
     *                         <td><b>ENT_QUOTES</b></td>
2428
     *                         <td>Will convert both double and single quotes.</td>
2429
     *                         </tr>
2430
     *                         <tr valign="top">
2431
     *                         <td><b>ENT_NOQUOTES</b></td>
2432
     *                         <td>Will leave both double and single quotes unconverted.</td>
2433
     *                         </tr>
2434
     *                         <tr valign="top">
2435
     *                         <td><b>ENT_HTML401</b></td>
2436
     *                         <td>
2437
     *                         Handle code as HTML 4.01.
2438
     *                         </td>
2439
     *                         </tr>
2440
     *                         <tr valign="top">
2441
     *                         <td><b>ENT_XML1</b></td>
2442
     *                         <td>
2443
     *                         Handle code as XML 1.
2444
     *                         </td>
2445
     *                         </tr>
2446
     *                         <tr valign="top">
2447
     *                         <td><b>ENT_XHTML</b></td>
2448
     *                         <td>
2449
     *                         Handle code as XHTML.
2450
     *                         </td>
2451
     *                         </tr>
2452
     *                         <tr valign="top">
2453
     *                         <td><b>ENT_HTML5</b></td>
2454
     *                         <td>
2455
     *                         Handle code as HTML 5.
2456
     *                         </td>
2457
     *                         </tr>
2458
     *                         </table>
2459
     *                         </p>
2460
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2461
     *
2462
     * @return string the decoded string
2463
     */
2464 42
    public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string
2465
    {
2466
        if (
2467 42
            !isset($str[3]) // examples: &; || &x;
2468
            ||
2469 42
            \strpos($str, '&') === false // no "&"
2470
        ) {
2471 19
            return $str;
2472
        }
2473
2474 41
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2475 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2476
        }
2477
2478 41
        if ($flags === null) {
2479 10
            $flags = \ENT_QUOTES | \ENT_HTML5;
2480
        }
2481
2482
        if (
2483 41
            $encoding !== 'UTF-8'
2484
            &&
2485 41
            $encoding !== 'ISO-8859-1'
2486
            &&
2487 41
            $encoding !== 'WINDOWS-1252'
2488
            &&
2489 41
            self::$SUPPORT['mbstring'] === false
2490
        ) {
2491
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
2492
        }
2493
2494
        do {
2495 41
            $str_compare = $str;
2496
2497
            // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2498 41
            if (self::$SUPPORT['mbstring'] === true) {
2499 41
                if ($encoding === 'UTF-8') {
2500 41
                    $str = \mb_decode_numericentity(
2501 41
                        $str,
2502 41
                        [0x80, 0xfffff, 0, 0xfffff, 0]
2503
                    );
2504
                } else {
2505 4
                    $str = \mb_decode_numericentity(
2506 4
                        $str,
2507 4
                        [0x80, 0xfffff, 0, 0xfffff, 0],
2508 41
                        $encoding
2509
                    );
2510
                }
2511
            } else {
2512
                $str = (string) \preg_replace_callback(
2513
                    "/&#\d{2,6};/",
2514
                    /**
2515
                     * @param string[] $matches
2516
                     *
2517
                     * @return string
2518
                     */
2519
                    static function (array $matches) use ($encoding): string {
2520
                        $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2521
                        if ($returnTmp !== '"' && $returnTmp !== "'") {
2522
                            return $returnTmp;
2523
                        }
2524
2525
                        return $matches[0];
2526
                    },
2527
                    $str
2528
                );
2529
            }
2530
2531 41
            if (\strpos($str, '&') !== false) {
2532 37
                if (\strpos($str, '&#') !== false) {
2533
                    // decode also numeric & UTF16 two byte entities
2534 29
                    $str = (string) \preg_replace(
2535 29
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
2536 29
                        '$1;',
2537 29
                        $str
2538
                    );
2539
                }
2540
2541 37
                $str = \html_entity_decode(
2542 37
                    $str,
2543 37
                    $flags,
2544 37
                    $encoding
2545
                );
2546
            }
2547 41
        } while ($str_compare !== $str);
2548
2549 41
        return $str;
2550
    }
2551
2552
    /**
2553
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
2554
     *
2555
     * @param string $str
2556
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2557
     *
2558
     * @return string
2559
     */
2560 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
2561
    {
2562 6
        return self::htmlspecialchars(
2563 6
            $str,
2564 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
2565 6
            $encoding
2566
        );
2567
    }
2568
2569
    /**
2570
     * Remove empty html-tag.
2571
     *
2572
     * e.g.: <tag></tag>
2573
     *
2574
     * @param string $str
2575
     *
2576
     * @return string
2577
     */
2578 1
    public static function html_stripe_empty_tags(string $str): string
2579
    {
2580 1
        return (string) \preg_replace(
2581 1
            "/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu",
2582 1
            '',
2583 1
            $str
2584
        );
2585
    }
2586
2587
    /**
2588
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2589
     *
2590
     * @see http://php.net/manual/en/function.htmlentities.php
2591
     *
2592
     * @param string $str           <p>
2593
     *                              The input string.
2594
     *                              </p>
2595
     * @param int    $flags         [optional] <p>
2596
     *                              A bitmask of one or more of the following flags, which specify how to handle
2597
     *                              quotes, invalid code unit sequences and the used document type. The default is
2598
     *                              ENT_COMPAT | ENT_HTML401.
2599
     *                              <table>
2600
     *                              Available <i>flags</i> constants
2601
     *                              <tr valign="top">
2602
     *                              <td>Constant Name</td>
2603
     *                              <td>Description</td>
2604
     *                              </tr>
2605
     *                              <tr valign="top">
2606
     *                              <td><b>ENT_COMPAT</b></td>
2607
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2608
     *                              </tr>
2609
     *                              <tr valign="top">
2610
     *                              <td><b>ENT_QUOTES</b></td>
2611
     *                              <td>Will convert both double and single quotes.</td>
2612
     *                              </tr>
2613
     *                              <tr valign="top">
2614
     *                              <td><b>ENT_NOQUOTES</b></td>
2615
     *                              <td>Will leave both double and single quotes unconverted.</td>
2616
     *                              </tr>
2617
     *                              <tr valign="top">
2618
     *                              <td><b>ENT_IGNORE</b></td>
2619
     *                              <td>
2620
     *                              Silently discard invalid code unit sequences instead of returning
2621
     *                              an empty string. Using this flag is discouraged as it
2622
     *                              may have security implications.
2623
     *                              </td>
2624
     *                              </tr>
2625
     *                              <tr valign="top">
2626
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2627
     *                              <td>
2628
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2629
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2630
     *                              string.
2631
     *                              </td>
2632
     *                              </tr>
2633
     *                              <tr valign="top">
2634
     *                              <td><b>ENT_DISALLOWED</b></td>
2635
     *                              <td>
2636
     *                              Replace invalid code points for the given document type with a
2637
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2638
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2639
     *                              instance, to ensure the well-formedness of XML documents with
2640
     *                              embedded external content.
2641
     *                              </td>
2642
     *                              </tr>
2643
     *                              <tr valign="top">
2644
     *                              <td><b>ENT_HTML401</b></td>
2645
     *                              <td>
2646
     *                              Handle code as HTML 4.01.
2647
     *                              </td>
2648
     *                              </tr>
2649
     *                              <tr valign="top">
2650
     *                              <td><b>ENT_XML1</b></td>
2651
     *                              <td>
2652
     *                              Handle code as XML 1.
2653
     *                              </td>
2654
     *                              </tr>
2655
     *                              <tr valign="top">
2656
     *                              <td><b>ENT_XHTML</b></td>
2657
     *                              <td>
2658
     *                              Handle code as XHTML.
2659
     *                              </td>
2660
     *                              </tr>
2661
     *                              <tr valign="top">
2662
     *                              <td><b>ENT_HTML5</b></td>
2663
     *                              <td>
2664
     *                              Handle code as HTML 5.
2665
     *                              </td>
2666
     *                              </tr>
2667
     *                              </table>
2668
     *                              </p>
2669
     * @param string $encoding      [optional] <p>
2670
     *                              Like <b>htmlspecialchars</b>,
2671
     *                              <b>htmlentities</b> takes an optional third argument
2672
     *                              <i>encoding</i> which defines encoding used in
2673
     *                              conversion.
2674
     *                              Although this argument is technically optional, you are highly
2675
     *                              encouraged to specify the correct value for your code.
2676
     *                              </p>
2677
     * @param bool   $double_encode [optional] <p>
2678
     *                              When <i>double_encode</i> is turned off PHP will not
2679
     *                              encode existing html entities. The default is to convert everything.
2680
     *                              </p>
2681
     *
2682
     * @return string
2683
     *                <p>
2684
     *                The encoded string.
2685
     *                <br><br>
2686
     *                If the input <i>string</i> contains an invalid code unit
2687
     *                sequence within the given <i>encoding</i> an empty string
2688
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2689
     *                <b>ENT_SUBSTITUTE</b> flags are set.
2690
     *                </p>
2691
     */
2692 9
    public static function htmlentities(
2693
        string $str,
2694
        int $flags = \ENT_COMPAT,
2695
        string $encoding = 'UTF-8',
2696
        bool $double_encode = true
2697
    ): string {
2698 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2699 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2700
        }
2701
2702 9
        $str = \htmlentities($str, $flags, $encoding, $double_encode);
2703
2704
        /**
2705
         * PHP doesn't replace a backslash to its html entity since this is something
2706
         * that's mostly used to escape characters when inserting in a database. Since
2707
         * we're using a decent database layer, we don't need this shit and we're replacing
2708
         * the double backslashes by its' html entity equivalent.
2709
         *
2710
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2711
         */
2712 9
        $str = \str_replace('\\', '&#92;', $str);
2713
2714 9
        return self::html_encode($str, true, $encoding);
2715
    }
2716
2717
    /**
2718
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2719
     *
2720
     * INFO: Take a look at "UTF8::htmlentities()"
2721
     *
2722
     * @see http://php.net/manual/en/function.htmlspecialchars.php
2723
     *
2724
     * @param string $str           <p>
2725
     *                              The string being converted.
2726
     *                              </p>
2727
     * @param int    $flags         [optional] <p>
2728
     *                              A bitmask of one or more of the following flags, which specify how to handle
2729
     *                              quotes, invalid code unit sequences and the used document type. The default is
2730
     *                              ENT_COMPAT | ENT_HTML401.
2731
     *                              <table>
2732
     *                              Available <i>flags</i> constants
2733
     *                              <tr valign="top">
2734
     *                              <td>Constant Name</td>
2735
     *                              <td>Description</td>
2736
     *                              </tr>
2737
     *                              <tr valign="top">
2738
     *                              <td><b>ENT_COMPAT</b></td>
2739
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2740
     *                              </tr>
2741
     *                              <tr valign="top">
2742
     *                              <td><b>ENT_QUOTES</b></td>
2743
     *                              <td>Will convert both double and single quotes.</td>
2744
     *                              </tr>
2745
     *                              <tr valign="top">
2746
     *                              <td><b>ENT_NOQUOTES</b></td>
2747
     *                              <td>Will leave both double and single quotes unconverted.</td>
2748
     *                              </tr>
2749
     *                              <tr valign="top">
2750
     *                              <td><b>ENT_IGNORE</b></td>
2751
     *                              <td>
2752
     *                              Silently discard invalid code unit sequences instead of returning
2753
     *                              an empty string. Using this flag is discouraged as it
2754
     *                              may have security implications.
2755
     *                              </td>
2756
     *                              </tr>
2757
     *                              <tr valign="top">
2758
     *                              <td><b>ENT_SUBSTITUTE</b></td>
2759
     *                              <td>
2760
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
2761
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
2762
     *                              string.
2763
     *                              </td>
2764
     *                              </tr>
2765
     *                              <tr valign="top">
2766
     *                              <td><b>ENT_DISALLOWED</b></td>
2767
     *                              <td>
2768
     *                              Replace invalid code points for the given document type with a
2769
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2770
     *                              (otherwise) instead of leaving them as is. This may be useful, for
2771
     *                              instance, to ensure the well-formedness of XML documents with
2772
     *                              embedded external content.
2773
     *                              </td>
2774
     *                              </tr>
2775
     *                              <tr valign="top">
2776
     *                              <td><b>ENT_HTML401</b></td>
2777
     *                              <td>
2778
     *                              Handle code as HTML 4.01.
2779
     *                              </td>
2780
     *                              </tr>
2781
     *                              <tr valign="top">
2782
     *                              <td><b>ENT_XML1</b></td>
2783
     *                              <td>
2784
     *                              Handle code as XML 1.
2785
     *                              </td>
2786
     *                              </tr>
2787
     *                              <tr valign="top">
2788
     *                              <td><b>ENT_XHTML</b></td>
2789
     *                              <td>
2790
     *                              Handle code as XHTML.
2791
     *                              </td>
2792
     *                              </tr>
2793
     *                              <tr valign="top">
2794
     *                              <td><b>ENT_HTML5</b></td>
2795
     *                              <td>
2796
     *                              Handle code as HTML 5.
2797
     *                              </td>
2798
     *                              </tr>
2799
     *                              </table>
2800
     *                              </p>
2801
     * @param string $encoding      [optional] <p>
2802
     *                              Defines encoding used in conversion.
2803
     *                              </p>
2804
     *                              <p>
2805
     *                              For the purposes of this function, the encodings
2806
     *                              ISO-8859-1, ISO-8859-15,
2807
     *                              UTF-8, cp866,
2808
     *                              cp1251, cp1252, and
2809
     *                              KOI8-R are effectively equivalent, provided the
2810
     *                              <i>string</i> itself is valid for the encoding, as
2811
     *                              the characters affected by <b>htmlspecialchars</b> occupy
2812
     *                              the same positions in all of these encodings.
2813
     *                              </p>
2814
     * @param bool   $double_encode [optional] <p>
2815
     *                              When <i>double_encode</i> is turned off PHP will not
2816
     *                              encode existing html entities, the default is to convert everything.
2817
     *                              </p>
2818
     *
2819
     * @return string the converted string.
2820
     *                </p>
2821
     *                <p>
2822
     *                If the input <i>string</i> contains an invalid code unit
2823
     *                sequence within the given <i>encoding</i> an empty string
2824
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
2825
     *                <b>ENT_SUBSTITUTE</b> flags are set
2826
     */
2827 8
    public static function htmlspecialchars(
2828
        string $str,
2829
        int $flags = \ENT_COMPAT,
2830
        string $encoding = 'UTF-8',
2831
        bool $double_encode = true
2832
    ): string {
2833 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2834 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2835
        }
2836
2837 8
        return \htmlspecialchars($str, $flags, $encoding, $double_encode);
2838
    }
2839
2840
    /**
2841
     * Checks whether iconv is available on the server.
2842
     *
2843
     * @return bool
2844
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2845
     */
2846
    public static function iconv_loaded(): bool
2847
    {
2848
        return \extension_loaded('iconv');
2849
    }
2850
2851
    /**
2852
     * alias for "UTF8::decimal_to_chr()"
2853
     *
2854
     * @see UTF8::decimal_to_chr()
2855
     *
2856
     * @param mixed $int
2857
     *
2858
     * @return string
2859
     */
2860 4
    public static function int_to_chr($int): string
2861
    {
2862 4
        return self::decimal_to_chr($int);
2863
    }
2864
2865
    /**
2866
     * Converts Integer to hexadecimal U+xxxx code point representation.
2867
     *
2868
     * INFO: opposite to UTF8::hex_to_int()
2869
     *
2870
     * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2871
     * @param string $pfix [optional]
2872
     *
2873
     * @return string the code point, or empty string on failure
2874
     */
2875 6
    public static function int_to_hex(int $int, string $pfix = 'U+'): string
2876
    {
2877 6
        $hex = \dechex($int);
2878
2879 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
2880
2881 6
        return $pfix . $hex . '';
2882
    }
2883
2884
    /**
2885
     * Checks whether intl-char is available on the server.
2886
     *
2887
     * @return bool
2888
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2889
     */
2890
    public static function intlChar_loaded(): bool
2891
    {
2892
        return \class_exists('IntlChar');
2893
    }
2894
2895
    /**
2896
     * Checks whether intl is available on the server.
2897
     *
2898
     * @return bool
2899
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2900
     */
2901 5
    public static function intl_loaded(): bool
2902
    {
2903 5
        return \extension_loaded('intl');
2904
    }
2905
2906
    /**
2907
     * alias for "UTF8::is_ascii()"
2908
     *
2909
     * @see        UTF8::is_ascii()
2910
     *
2911
     * @param string $str
2912
     *
2913
     * @return bool
2914
     *
2915
     * @deprecated <p>use "UTF8::is_ascii()"</p>
2916
     */
2917 2
    public static function isAscii(string $str): bool
2918
    {
2919 2
        return self::is_ascii($str);
2920
    }
2921
2922
    /**
2923
     * alias for "UTF8::is_base64()"
2924
     *
2925
     * @see        UTF8::is_base64()
2926
     *
2927
     * @param string $str
2928
     *
2929
     * @return bool
2930
     *
2931
     * @deprecated <p>use "UTF8::is_base64()"</p>
2932
     */
2933 2
    public static function isBase64($str): bool
2934
    {
2935 2
        return self::is_base64($str);
2936
    }
2937
2938
    /**
2939
     * alias for "UTF8::is_binary()"
2940
     *
2941
     * @see        UTF8::is_binary()
2942
     *
2943
     * @param mixed $str
2944
     * @param bool  $strict
2945
     *
2946
     * @return bool
2947
     *
2948
     * @deprecated <p>use "UTF8::is_binary()"</p>
2949
     */
2950 4
    public static function isBinary($str, $strict = false): bool
2951
    {
2952 4
        return self::is_binary($str, $strict);
2953
    }
2954
2955
    /**
2956
     * alias for "UTF8::is_bom()"
2957
     *
2958
     * @see        UTF8::is_bom()
2959
     *
2960
     * @param string $utf8_chr
2961
     *
2962
     * @return bool
2963
     *
2964
     * @deprecated <p>use "UTF8::is_bom()"</p>
2965
     */
2966 2
    public static function isBom(string $utf8_chr): bool
2967
    {
2968 2
        return self::is_bom($utf8_chr);
2969
    }
2970
2971
    /**
2972
     * alias for "UTF8::is_html()"
2973
     *
2974
     * @see        UTF8::is_html()
2975
     *
2976
     * @param string $str
2977
     *
2978
     * @return bool
2979
     *
2980
     * @deprecated <p>use "UTF8::is_html()"</p>
2981
     */
2982 2
    public static function isHtml(string $str): bool
2983
    {
2984 2
        return self::is_html($str);
2985
    }
2986
2987
    /**
2988
     * alias for "UTF8::is_json()"
2989
     *
2990
     * @see        UTF8::is_json()
2991
     *
2992
     * @param string $str
2993
     *
2994
     * @return bool
2995
     *
2996
     * @deprecated <p>use "UTF8::is_json()"</p>
2997
     */
2998
    public static function isJson(string $str): bool
2999
    {
3000
        return self::is_json($str);
3001
    }
3002
3003
    /**
3004
     * alias for "UTF8::is_utf16()"
3005
     *
3006
     * @see        UTF8::is_utf16()
3007
     *
3008
     * @param mixed $str
3009
     *
3010
     * @return false|int
3011
     *                   <strong>false</strong> if is't not UTF16,<br>
3012
     *                   <strong>1</strong> for UTF-16LE,<br>
3013
     *                   <strong>2</strong> for UTF-16BE
3014
     *
3015
     * @deprecated <p>use "UTF8::is_utf16()"</p>
3016
     */
3017 2
    public static function isUtf16($str)
3018
    {
3019 2
        return self::is_utf16($str);
3020
    }
3021
3022
    /**
3023
     * alias for "UTF8::is_utf32()"
3024
     *
3025
     * @see        UTF8::is_utf32()
3026
     *
3027
     * @param mixed $str
3028
     *
3029
     * @return false|int
3030
     *                   <strong>false</strong> if is't not UTF16,
3031
     *                   <strong>1</strong> for UTF-32LE,
3032
     *                   <strong>2</strong> for UTF-32BE
3033
     *
3034
     * @deprecated <p>use "UTF8::is_utf32()"</p>
3035
     */
3036 2
    public static function isUtf32($str)
3037
    {
3038 2
        return self::is_utf32($str);
3039
    }
3040
3041
    /**
3042
     * alias for "UTF8::is_utf8()"
3043
     *
3044
     * @see        UTF8::is_utf8()
3045
     *
3046
     * @param string $str
3047
     * @param bool   $strict
3048
     *
3049
     * @return bool
3050
     *
3051
     * @deprecated <p>use "UTF8::is_utf8()"</p>
3052
     */
3053 17
    public static function isUtf8($str, $strict = false): bool
3054
    {
3055 17
        return self::is_utf8($str, $strict);
3056
    }
3057
3058
    /**
3059
     * Returns true if the string contains only alphabetic chars, false otherwise.
3060
     *
3061
     * @param string $str
3062
     *
3063
     * @return bool
3064
     *              Whether or not $str contains only alphabetic chars
3065
     */
3066 10
    public static function is_alpha(string $str): bool
3067
    {
3068 10
        if (self::$SUPPORT['mbstring'] === true) {
3069
            /** @noinspection PhpComposerExtensionStubsInspection */
3070 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3071
        }
3072
3073
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3074
    }
3075
3076
    /**
3077
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3078
     *
3079
     * @param string $str
3080
     *
3081
     * @return bool
3082
     *              Whether or not $str contains only alphanumeric chars
3083
     */
3084 13
    public static function is_alphanumeric(string $str): bool
3085
    {
3086 13
        if (self::$SUPPORT['mbstring'] === true) {
3087
            /** @noinspection PhpComposerExtensionStubsInspection */
3088 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3089
        }
3090
3091
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3092
    }
3093
3094
    /**
3095
     * Checks if a string is 7 bit ASCII.
3096
     *
3097
     * @param string $str <p>The string to check.</p>
3098
     *
3099
     * @return bool
3100
     *              <strong>true</strong> if it is ASCII<br>
3101
     *              <strong>false</strong> otherwise
3102
     */
3103 137
    public static function is_ascii(string $str): bool
3104
    {
3105 137
        if ($str === '') {
3106 10
            return true;
3107
        }
3108
3109 136
        return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
3110
    }
3111
3112
    /**
3113
     * Returns true if the string is base64 encoded, false otherwise.
3114
     *
3115
     * @param mixed|string $str                <p>The input string.</p>
3116
     * @param bool         $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p>
3117
     *
3118
     * @return bool whether or not $str is base64 encoded
3119
     */
3120 16
    public static function is_base64($str, $emptyStringIsValid = false): bool
3121
    {
3122 16
        if ($emptyStringIsValid === false && $str === '') {
3123 3
            return false;
3124
        }
3125
3126
        /**
3127
         * @psalm-suppress RedundantConditionGivenDocblockType
3128
         */
3129 15
        if (\is_string($str) === false) {
3130 2
            return false;
3131
        }
3132
3133 15
        $base64String = \base64_decode($str, true);
3134
3135 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3136
    }
3137
3138
    /**
3139
     * Check if the input is binary... (is look like a hack).
3140
     *
3141
     * @param mixed $input
3142
     * @param bool  $strict
3143
     *
3144
     * @return bool
3145
     */
3146 39
    public static function is_binary($input, bool $strict = false): bool
3147
    {
3148 39
        $input = (string) $input;
3149 39
        if ($input === '') {
3150 10
            return false;
3151
        }
3152
3153 39
        if (\preg_match('~^[01]+$~', $input)) {
3154 13
            return true;
3155
        }
3156
3157 39
        $ext = self::get_file_type($input);
3158 39
        if ($ext['type'] === 'binary') {
3159 7
            return true;
3160
        }
3161
3162 36
        $testLength = \strlen($input);
3163 36
        $testNull = \substr_count($input, "\x0", 0, $testLength);
3164 36
        if (($testNull / $testLength) > 0.25) {
3165 12
            return true;
3166
        }
3167
3168 34
        if ($strict === true) {
3169 34
            if (self::$SUPPORT['finfo'] === false) {
3170
                throw new \RuntimeException('ext-fileinfo: is not installed');
3171
            }
3172
3173
            /** @noinspection PhpComposerExtensionStubsInspection */
3174 34
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3175 34
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3176 15
                return true;
3177
            }
3178
        }
3179
3180 30
        return false;
3181
    }
3182
3183
    /**
3184
     * Check if the file is binary.
3185
     *
3186
     * @param string $file
3187
     *
3188
     * @return bool
3189
     */
3190 6
    public static function is_binary_file($file): bool
3191
    {
3192
        // init
3193 6
        $block = '';
3194
3195 6
        $fp = \fopen($file, 'rb');
3196 6
        if (\is_resource($fp)) {
3197 6
            $block = \fread($fp, 512);
3198 6
            \fclose($fp);
3199
        }
3200
3201 6
        if ($block === '') {
3202 2
            return false;
3203
        }
3204
3205 6
        return self::is_binary($block, true);
3206
    }
3207
3208
    /**
3209
     * Returns true if the string contains only whitespace chars, false otherwise.
3210
     *
3211
     * @param string $str
3212
     *
3213
     * @return bool
3214
     *              Whether or not $str contains only whitespace characters
3215
     */
3216 15
    public static function is_blank(string $str): bool
3217
    {
3218 15
        if (self::$SUPPORT['mbstring'] === true) {
3219
            /** @noinspection PhpComposerExtensionStubsInspection */
3220 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3221
        }
3222
3223
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3224
    }
3225
3226
    /**
3227
     * Checks if the given string is equal to any "Byte Order Mark".
3228
     *
3229
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3230
     *
3231
     * @param string $str <p>The input string.</p>
3232
     *
3233
     * @return bool
3234
     *              <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise
3235
     */
3236 2
    public static function is_bom($str): bool
3237
    {
3238
        /** @noinspection PhpUnusedLocalVariableInspection */
3239 2
        foreach (self::$BOM as $bomString => &$bomByteLength) {
3240 2
            if ($str === $bomString) {
3241 2
                return true;
3242
            }
3243
        }
3244
3245 2
        return false;
3246
    }
3247
3248
    /**
3249
     * Determine whether the string is considered to be empty.
3250
     *
3251
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3252
     * empty() does not generate a warning if the variable does not exist.
3253
     *
3254
     * @param mixed $str
3255
     *
3256
     * @return bool whether or not $str is empty()
3257
     */
3258
    public static function is_empty($str): bool
3259
    {
3260
        return empty($str);
3261
    }
3262
3263
    /**
3264
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3265
     *
3266
     * @param string $str
3267
     *
3268
     * @return bool
3269
     *              Whether or not $str contains only hexadecimal chars
3270
     */
3271 13
    public static function is_hexadecimal(string $str): bool
3272
    {
3273 13
        if (self::$SUPPORT['mbstring'] === true) {
3274
            /** @noinspection PhpComposerExtensionStubsInspection */
3275 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3276
        }
3277
3278
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3279
    }
3280
3281
    /**
3282
     * Check if the string contains any html-tags <lall>.
3283
     *
3284
     * @param string $str <p>The input string.</p>
3285
     *
3286
     * @return bool
3287
     */
3288 3
    public static function is_html(string $str): bool
3289
    {
3290 3
        if ($str === '') {
3291 3
            return false;
3292
        }
3293
3294
        // init
3295 3
        $matches = [];
3296
3297 3
        \preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
3298
3299 3
        return \count($matches) !== 0;
3300
    }
3301
3302
    /**
3303
     * Try to check if "$str" is an json-string.
3304
     *
3305
     * @param string $str                              <p>The input string.</p>
3306
     * @param bool   $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p>
3307
     *
3308
     * @return bool
3309
     */
3310 42
    public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool
3311
    {
3312 42
        if ($str === '') {
3313 4
            return false;
3314
        }
3315
3316 40
        if (self::$SUPPORT['json'] === false) {
3317
            throw new \RuntimeException('ext-json: is not installed');
3318
        }
3319
3320 40
        $json = self::json_decode($str);
3321 40
        if ($json === null && \strtoupper($str) !== 'NULL') {
3322 18
            return false;
3323
        }
3324
3325
        if (
3326 24
            $onlyArrayOrObjectResultsAreValid === true
3327
            &&
3328 24
            \is_object($json) === false
3329
            &&
3330 24
            \is_array($json) === false
3331
        ) {
3332 5
            return false;
3333
        }
3334
3335
        /** @noinspection PhpComposerExtensionStubsInspection */
3336 19
        return \json_last_error() === \JSON_ERROR_NONE;
3337
    }
3338
3339
    /**
3340
     * @param string $str
3341
     *
3342
     * @return bool
3343
     */
3344 8
    public static function is_lowercase(string $str): bool
3345
    {
3346 8
        if (self::$SUPPORT['mbstring'] === true) {
3347
            /** @noinspection PhpComposerExtensionStubsInspection */
3348 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
3349
        }
3350
3351
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
3352
    }
3353
3354
    /**
3355
     * Returns true if the string is serialized, false otherwise.
3356
     *
3357
     * @param string $str
3358
     *
3359
     * @return bool whether or not $str is serialized
3360
     */
3361 7
    public static function is_serialized(string $str): bool
3362
    {
3363 7
        if ($str === '') {
3364 1
            return false;
3365
        }
3366
3367
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
3368
        /** @noinspection UnserializeExploitsInspection */
3369 6
        return $str === 'b:0;'
3370
               ||
3371 6
               @\unserialize($str) !== false;
3372
    }
3373
3374
    /**
3375
     * Returns true if the string contains only lower case chars, false
3376
     * otherwise.
3377
     *
3378
     * @param string $str <p>The input string.</p>
3379
     *
3380
     * @return bool
3381
     *              Whether or not $str contains only lower case characters
3382
     */
3383 8
    public static function is_uppercase(string $str): bool
3384
    {
3385 8
        if (self::$SUPPORT['mbstring'] === true) {
3386
            /** @noinspection PhpComposerExtensionStubsInspection */
3387 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
3388
        }
3389
3390
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
3391
    }
3392
3393
    /**
3394
     * Check if the string is UTF-16.
3395
     *
3396
     * @param mixed $str                   <p>The input string.</p>
3397
     * @param bool  $checkIfStringIsBinary
3398
     *
3399
     * @return false|int
3400
     *                   <strong>false</strong> if is't not UTF-16,<br>
3401
     *                   <strong>1</strong> for UTF-16LE,<br>
3402
     *                   <strong>2</strong> for UTF-16BE
3403
     */
3404 22
    public static function is_utf16($str, $checkIfStringIsBinary = true)
3405
    {
3406
        // init
3407 22
        $str = (string) $str;
3408 22
        $strChars = [];
3409
3410
        if (
3411 22
            $checkIfStringIsBinary === true
3412
            &&
3413 22
            self::is_binary($str, true) === false
3414
        ) {
3415 2
            return false;
3416
        }
3417
3418 22
        if (self::$SUPPORT['mbstring'] === false) {
3419 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
3420
        }
3421
3422 22
        $str = self::remove_bom($str);
3423
3424 22
        $maybeUTF16LE = 0;
3425 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3426 22
        if ($test) {
3427 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3428 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3429 15
            if ($test3 === $test) {
3430 15
                if (\count($strChars) === 0) {
3431 15
                    $strChars = self::count_chars($str, true, false);
3432
                }
3433 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3434 15
                    if (\in_array($test3char, $strChars, true) === true) {
3435 15
                        ++$maybeUTF16LE;
3436
                    }
3437
                }
3438 15
                unset($test3charEmpty);
3439
            }
3440
        }
3441
3442 22
        $maybeUTF16BE = 0;
3443 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3444 22
        if ($test) {
3445 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3446 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3447 15
            if ($test3 === $test) {
3448 15
                if (\count($strChars) === 0) {
3449 7
                    $strChars = self::count_chars($str, true, false);
3450
                }
3451 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3452 15
                    if (\in_array($test3char, $strChars, true) === true) {
3453 15
                        ++$maybeUTF16BE;
3454
                    }
3455
                }
3456 15
                unset($test3charEmpty);
3457
            }
3458
        }
3459
3460 22
        if ($maybeUTF16BE !== $maybeUTF16LE) {
3461 6
            if ($maybeUTF16LE > $maybeUTF16BE) {
3462 4
                return 1;
3463
            }
3464
3465 6
            return 2;
3466
        }
3467
3468 18
        return false;
3469
    }
3470
3471
    /**
3472
     * Check if the string is UTF-32.
3473
     *
3474
     * @param mixed $str                   <p>The input string.</p>
3475
     * @param bool  $checkIfStringIsBinary
3476
     *
3477
     * @return false|int
3478
     *                   <strong>false</strong> if is't not UTF-32,<br>
3479
     *                   <strong>1</strong> for UTF-32LE,<br>
3480
     *                   <strong>2</strong> for UTF-32BE
3481
     */
3482 18
    public static function is_utf32($str, $checkIfStringIsBinary = true)
3483
    {
3484
        // init
3485 18
        $str = (string) $str;
3486 18
        $strChars = [];
3487
3488
        if (
3489 18
            $checkIfStringIsBinary === true
3490
            &&
3491 18
            self::is_binary($str, true) === false
3492
        ) {
3493 2
            return false;
3494
        }
3495
3496 18
        if (self::$SUPPORT['mbstring'] === false) {
3497 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
3498
        }
3499
3500 18
        $str = self::remove_bom($str);
3501
3502 18
        $maybeUTF32LE = 0;
3503 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3504 18
        if ($test) {
3505 11
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3506 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3507 11
            if ($test3 === $test) {
3508 11
                if (\count($strChars) === 0) {
3509 11
                    $strChars = self::count_chars($str, true, false);
3510
                }
3511 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3512 11
                    if (\in_array($test3char, $strChars, true) === true) {
3513 11
                        ++$maybeUTF32LE;
3514
                    }
3515
                }
3516 11
                unset($test3charEmpty);
3517
            }
3518
        }
3519
3520 18
        $maybeUTF32BE = 0;
3521 18
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3522 18
        if ($test) {
3523 11
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3524 11
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3525 11
            if ($test3 === $test) {
3526 11
                if (\count($strChars) === 0) {
3527 7
                    $strChars = self::count_chars($str, true, false);
3528
                }
3529 11
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
3530 11
                    if (\in_array($test3char, $strChars, true) === true) {
3531 11
                        ++$maybeUTF32BE;
3532
                    }
3533
                }
3534 11
                unset($test3charEmpty);
3535
            }
3536
        }
3537
3538 18
        if ($maybeUTF32BE !== $maybeUTF32LE) {
3539 2
            if ($maybeUTF32LE > $maybeUTF32BE) {
3540 2
                return 1;
3541
            }
3542
3543 2
            return 2;
3544
        }
3545
3546 18
        return false;
3547
    }
3548
3549
    /**
3550
     * Encode a string with emoji chars into a non-emoji string.
3551
     *
3552
     * @param string $str                        <p>The input string</p>
3553
     * @param bool   $useReversibleStringMapping [optional] <p>
3554
     *                                           when <b>TRUE</b>, we se a reversible string mapping
3555
     *                                           between "emoji_encode" and "emoji_decode"</p>
3556
     *
3557
     * @return string
3558
     */
3559 9
    public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string
3560
    {
3561 9
        self::initEmojiData();
3562
3563 9
        if ($useReversibleStringMapping === true) {
3564 9
            return (string) \str_replace(
3565 9
                (array) self::$EMOJI_VALUES_CACHE,
3566 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3567 9
                $str
3568
            );
3569
        }
3570
3571 1
        return (string) \str_replace(
3572 1
            (array) self::$EMOJI_VALUES_CACHE,
3573 1
            (array) self::$EMOJI_KEYS_CACHE,
3574 1
            $str
3575
        );
3576
    }
3577
3578
    /**
3579
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
3580
     *
3581
     * @param string $str                        <p>The input string.</p>
3582
     * @param bool   $useReversibleStringMapping [optional] <p>
3583
     *                                           When <b>TRUE</b>, we se a reversible string mapping
3584
     *                                           between "emoji_encode" and "emoji_decode".</p>
3585
     *
3586
     * @return string
3587
     */
3588 9
    public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string
3589
    {
3590 9
        self::initEmojiData();
3591
3592 9
        if ($useReversibleStringMapping === true) {
3593 9
            return (string) \str_replace(
3594 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
3595 9
                (array) self::$EMOJI_VALUES_CACHE,
3596 9
                $str
3597
            );
3598
        }
3599
3600 1
        return (string) \str_replace(
3601 1
            (array) self::$EMOJI_KEYS_CACHE,
3602 1
            (array) self::$EMOJI_VALUES_CACHE,
3603 1
            $str
3604
        );
3605
    }
3606
3607
    /**
3608
     * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3609
     *
3610
     * @see    http://hsivonen.iki.fi/php-utf8/
3611
     *
3612
     * @param string|string[] $str    <p>The string to be checked.</p>
3613
     * @param bool            $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3614
     *
3615
     * @return bool
3616
     */
3617 106
    public static function is_utf8($str, bool $strict = false): bool
3618
    {
3619 106
        if (\is_array($str) === true) {
3620 2
            foreach ($str as &$v) {
3621 2
                if (self::is_utf8($v, $strict) === false) {
3622 2
                    return false;
3623
                }
3624
            }
3625
3626
            return true;
3627
        }
3628
3629 106
        if ($str === '') {
3630 12
            return true;
3631
        }
3632
3633 102
        if ($strict === true) {
3634 2
            $isBinary = self::is_binary($str, true);
3635
3636 2
            if ($isBinary && self::is_utf16($str, false) !== false) {
3637 2
                return false;
3638
            }
3639
3640
            if ($isBinary && self::is_utf32($str, false) !== false) {
3641
                return false;
3642
            }
3643
        }
3644
3645 102
        if (self::pcre_utf8_support() !== true) {
3646
3647
            // If even just the first character can be matched, when the /u
3648
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3649
            // invalid, nothing at all will match, even if the string contains
3650
            // some valid sequences
3651
            return \preg_match('/^.{1}/us', $str, $ar) === 1;
3652
        }
3653
3654 102
        $mState = 0; // cached expected number of octets after the current octet
3655
        // until the beginning of the next UTF8 character sequence
3656 102
        $mUcs4 = 0; // cached Unicode character
3657 102
        $mBytes = 1; // cached expected number of octets in the current sequence
3658
3659 102
        if (self::$ORD === null) {
3660
            self::$ORD = self::getData('ord');
3661
        }
3662
3663 102
        $len = \strlen((string) $str);
3664
        /** @noinspection ForeachInvariantsInspection */
3665 102
        for ($i = 0; $i < $len; ++$i) {
3666 102
            $in = self::$ORD[$str[$i]];
3667 102
            if ($mState === 0) {
3668
                // When mState is zero we expect either a US-ASCII character or a
3669
                // multi-octet sequence.
3670 102
                if ((0x80 & $in) === 0) {
3671
                    // US-ASCII, pass straight through.
3672 97
                    $mBytes = 1;
3673 83
                } elseif ((0xE0 & $in) === 0xC0) {
3674
                    // First octet of 2 octet sequence.
3675 73
                    $mUcs4 = $in;
3676 73
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
3677 73
                    $mState = 1;
3678 73
                    $mBytes = 2;
3679 58
                } elseif ((0xF0 & $in) === 0xE0) {
3680
                    // First octet of 3 octet sequence.
3681 42
                    $mUcs4 = $in;
3682 42
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
3683 42
                    $mState = 2;
3684 42
                    $mBytes = 3;
3685 29
                } elseif ((0xF8 & $in) === 0xF0) {
3686
                    // First octet of 4 octet sequence.
3687 18
                    $mUcs4 = $in;
3688 18
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
3689 18
                    $mState = 3;
3690 18
                    $mBytes = 4;
3691 13
                } elseif ((0xFC & $in) === 0xF8) {
3692
                    /* First octet of 5 octet sequence.
3693
                     *
3694
                     * This is illegal because the encoded codepoint must be either
3695
                     * (a) not the shortest form or
3696
                     * (b) outside the Unicode range of 0-0x10FFFF.
3697
                     * Rather than trying to resynchronize, we will carry on until the end
3698
                     * of the sequence and let the later error handling code catch it.
3699
                     */
3700 5
                    $mUcs4 = $in;
3701 5
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
3702 5
                    $mState = 4;
3703 5
                    $mBytes = 5;
3704 10
                } elseif ((0xFE & $in) === 0xFC) {
3705
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
3706 5
                    $mUcs4 = $in;
3707 5
                    $mUcs4 = ($mUcs4 & 1) << 30;
3708 5
                    $mState = 5;
3709 5
                    $mBytes = 6;
3710
                } else {
3711
                    // Current octet is neither in the US-ASCII range nor a legal first
3712
                    // octet of a multi-octet sequence.
3713 102
                    return false;
3714
                }
3715 83
            } elseif ((0xC0 & $in) === 0x80) {
3716
3717
                // When mState is non-zero, we expect a continuation of the multi-octet
3718
                // sequence
3719
3720
                // Legal continuation.
3721 75
                $shift = ($mState - 1) * 6;
3722 75
                $tmp = $in;
3723 75
                $tmp = ($tmp & 0x0000003F) << $shift;
3724 75
                $mUcs4 |= $tmp;
3725
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
3726
                // Unicode code point to be output.
3727 75
                if (--$mState === 0) {
3728
                    // Check for illegal sequences and code points.
3729
                    //
3730
                    // From Unicode 3.1, non-shortest form is illegal
3731
                    if (
3732 75
                        ($mBytes === 2 && $mUcs4 < 0x0080)
3733
                        ||
3734 75
                        ($mBytes === 3 && $mUcs4 < 0x0800)
3735
                        ||
3736 75
                        ($mBytes === 4 && $mUcs4 < 0x10000)
3737
                        ||
3738 75
                        ($mBytes > 4)
3739
                        ||
3740
                        // From Unicode 3.2, surrogate characters are illegal.
3741 75
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
3742
                        ||
3743
                        // Code points outside the Unicode range are illegal.
3744 75
                        ($mUcs4 > 0x10FFFF)
3745
                    ) {
3746 8
                        return false;
3747
                    }
3748
                    // initialize UTF8 cache
3749 75
                    $mState = 0;
3750 75
                    $mUcs4 = 0;
3751 75
                    $mBytes = 1;
3752
                }
3753
            } else {
3754
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
3755
                // Incomplete multi-octet sequence.
3756 35
                return false;
3757
            }
3758
        }
3759
3760 67
        return true;
3761
    }
3762
3763
    /**
3764
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3765
     * Decodes a JSON string
3766
     *
3767
     * @see http://php.net/manual/en/function.json-decode.php
3768
     *
3769
     * @param string $json    <p>
3770
     *                        The <i>json</i> string being decoded.
3771
     *                        </p>
3772
     *                        <p>
3773
     *                        This function only works with UTF-8 encoded strings.
3774
     *                        </p>
3775
     *                        <p>PHP implements a superset of
3776
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3777
     *                        only supports these values when they are nested inside an array or an object.
3778
     *                        </p>
3779
     * @param bool   $assoc   [optional] <p>
3780
     *                        When <b>TRUE</b>, returned objects will be converted into
3781
     *                        associative arrays.
3782
     *                        </p>
3783
     * @param int    $depth   [optional] <p>
3784
     *                        User specified recursion depth.
3785
     *                        </p>
3786
     * @param int    $options [optional] <p>
3787
     *                        Bitmask of JSON decode options. Currently only
3788
     *                        <b>JSON_BIGINT_AS_STRING</b>
3789
     *                        is supported (default is to cast large integers as floats)
3790
     *                        </p>
3791
     *
3792
     * @return mixed
3793
     *               The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
3794
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
3795
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
3796
     *               is deeper than the recursion limit.
3797
     */
3798 43
    public static function json_decode(
3799
        string $json,
3800
        bool $assoc = false,
3801
        int $depth = 512,
3802
        int $options = 0
3803
    ) {
3804 43
        $json = self::filter($json);
3805
3806 43
        if (self::$SUPPORT['json'] === false) {
3807
            throw new \RuntimeException('ext-json: is not installed');
3808
        }
3809
3810
        /** @noinspection PhpComposerExtensionStubsInspection */
3811 43
        return \json_decode($json, $assoc, $depth, $options);
3812
    }
3813
3814
    /**
3815
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3816
     * Returns the JSON representation of a value.
3817
     *
3818
     * @see http://php.net/manual/en/function.json-encode.php
3819
     *
3820
     * @param mixed $value   <p>
3821
     *                       The <i>value</i> being encoded. Can be any type except
3822
     *                       a resource.
3823
     *                       </p>
3824
     *                       <p>
3825
     *                       All string data must be UTF-8 encoded.
3826
     *                       </p>
3827
     *                       <p>PHP implements a superset of
3828
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3829
     *                       only supports these values when they are nested inside an array or an object.
3830
     *                       </p>
3831
     * @param int   $options [optional] <p>
3832
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3833
     *                       <b>JSON_HEX_TAG</b>,
3834
     *                       <b>JSON_HEX_AMP</b>,
3835
     *                       <b>JSON_HEX_APOS</b>,
3836
     *                       <b>JSON_NUMERIC_CHECK</b>,
3837
     *                       <b>JSON_PRETTY_PRINT</b>,
3838
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
3839
     *                       <b>JSON_FORCE_OBJECT</b>,
3840
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3841
     *                       constants is described on
3842
     *                       the JSON constants page.
3843
     *                       </p>
3844
     * @param int   $depth   [optional] <p>
3845
     *                       Set the maximum depth. Must be greater than zero.
3846
     *                       </p>
3847
     *
3848
     * @return false|string
3849
     *                      A JSON encoded <strong>string</strong> on success or<br>
3850
     *                      <strong>FALSE</strong> on failure
3851
     */
3852 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
3853
    {
3854 5
        $value = self::filter($value);
3855
3856 5
        if (self::$SUPPORT['json'] === false) {
3857
            throw new \RuntimeException('ext-json: is not installed');
3858
        }
3859
3860
        /** @noinspection PhpComposerExtensionStubsInspection */
3861 5
        return \json_encode($value, $options, $depth);
3862
    }
3863
3864
    /**
3865
     * Checks whether JSON is available on the server.
3866
     *
3867
     * @return bool
3868
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3869
     */
3870
    public static function json_loaded(): bool
3871
    {
3872
        return \function_exists('json_decode');
3873
    }
3874
3875
    /**
3876
     * Makes string's first char lowercase.
3877
     *
3878
     * @param string      $str                   <p>The input string</p>
3879
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
3880
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3881
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3882
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3883
     *
3884
     * @return string the resulting string
3885
     */
3886 46
    public static function lcfirst(
3887
        string $str,
3888
        string $encoding = 'UTF-8',
3889
        bool $cleanUtf8 = false,
3890
        string $lang = null,
3891
        bool $tryToKeepStringLength = false
3892
    ): string {
3893 46
        if ($cleanUtf8 === true) {
3894
            $str = self::clean($str);
3895
        }
3896
3897 46
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
3898
3899 46
        if ($encoding === 'UTF-8') {
3900 43
            $strPartTwo = (string) \mb_substr($str, 1);
3901
3902 43
            if ($useMbFunction === true) {
3903 43
                $strPartOne = \mb_strtolower(
3904 43
                    (string) \mb_substr($str, 0, 1)
3905
                );
3906
            } else {
3907
                $strPartOne = self::strtolower(
3908
                    (string) \mb_substr($str, 0, 1),
3909
                    $encoding,
3910
                    false,
3911
                    $lang,
3912 43
                    $tryToKeepStringLength
3913
                );
3914
            }
3915
        } else {
3916 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3917
3918 3
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
3919
3920 3
            $strPartOne = self::strtolower(
3921 3
                (string) self::substr($str, 0, 1, $encoding),
3922 3
                $encoding,
3923 3
                false,
3924 3
                $lang,
3925 3
                $tryToKeepStringLength
3926
            );
3927
        }
3928
3929 46
        return $strPartOne . $strPartTwo;
3930
    }
3931
3932
    /**
3933
     * alias for "UTF8::lcfirst()"
3934
     *
3935
     * @see UTF8::lcfirst()
3936
     *
3937
     * @param string      $str
3938
     * @param string      $encoding
3939
     * @param bool        $cleanUtf8
3940
     * @param string|null $lang
3941
     * @param bool        $tryToKeepStringLength
3942
     *
3943
     * @return string
3944
     */
3945 2
    public static function lcword(
3946
        string $str,
3947
        string $encoding = 'UTF-8',
3948
        bool $cleanUtf8 = false,
3949
        string $lang = null,
3950
        bool $tryToKeepStringLength = false
3951
    ): string {
3952 2
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3953
    }
3954
3955
    /**
3956
     * Lowercase for all words in the string.
3957
     *
3958
     * @param string      $str                   <p>The input string.</p>
3959
     * @param string[]    $exceptions            [optional] <p>Exclusion for some words.</p>
3960
     * @param string      $charlist              [optional] <p>Additional chars that contains to words and do not start
3961
     *                                           a new word.</p>
3962
     * @param string      $encoding              [optional] <p>Set the charset.</p>
3963
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
3964
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
3965
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
3966
     *
3967
     * @return string
3968
     */
3969 2
    public static function lcwords(
3970
        string $str,
3971
        array $exceptions = [],
3972
        string $charlist = '',
3973
        string $encoding = 'UTF-8',
3974
        bool $cleanUtf8 = false,
3975
        string $lang = null,
3976
        bool $tryToKeepStringLength = false
3977
    ): string {
3978 2
        if (!$str) {
3979 2
            return '';
3980
        }
3981
3982 2
        $words = self::str_to_words($str, $charlist);
3983 2
        $useExceptions = \count($exceptions) > 0;
3984
3985 2
        foreach ($words as &$word) {
3986 2
            if (!$word) {
3987 2
                continue;
3988
            }
3989
3990
            if (
3991 2
                $useExceptions === false
3992
                ||
3993 2
                !\in_array($word, $exceptions, true)
3994
            ) {
3995 2
                $word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
3996
            }
3997
        }
3998
3999 2
        return \implode('', $words);
4000
    }
4001
4002
    /**
4003
     * alias for "UTF8::lcfirst()"
4004
     *
4005
     * @see UTF8::lcfirst()
4006
     *
4007
     * @param string      $str
4008
     * @param string      $encoding
4009
     * @param bool        $cleanUtf8
4010
     * @param string|null $lang
4011
     * @param bool        $tryToKeepStringLength
4012
     *
4013
     * @return string
4014
     */
4015 5
    public static function lowerCaseFirst(
4016
        string $str,
4017
        string $encoding = 'UTF-8',
4018
        bool $cleanUtf8 = false,
4019
        string $lang = null,
4020
        bool $tryToKeepStringLength = false
4021
    ): string {
4022 5
        return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
4023
    }
4024
4025
    /**
4026
     * Strip whitespace or other characters from beginning of a UTF-8 string.
4027
     *
4028
     * @param string      $str   <p>The string to be trimmed</p>
4029
     * @param string|null $chars <p>Optional characters to be stripped</p>
4030
     *
4031
     * @return string the string with unwanted characters stripped from the left
4032
     */
4033 22
    public static function ltrim(string $str = '', string $chars = null): string
4034
    {
4035 22
        if ($str === '') {
4036 3
            return '';
4037
        }
4038
4039 21
        if ($chars) {
4040 10
            $chars = \preg_quote($chars, '/');
4041 10
            $pattern = "^[${chars}]+";
4042
        } else {
4043 14
            $pattern = "^[\s]+";
4044
        }
4045
4046 21
        if (self::$SUPPORT['mbstring'] === true) {
4047
            /** @noinspection PhpComposerExtensionStubsInspection */
4048 21
            return (string) \mb_ereg_replace($pattern, '', $str);
4049
        }
4050
4051
        return self::regex_replace($str, $pattern, '', '', '/');
4052
    }
4053
4054
    /**
4055
     * Returns the UTF-8 character with the maximum code point in the given data.
4056
     *
4057
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4058
     *
4059
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4060
     */
4061 2
    public static function max($arg)
4062
    {
4063 2
        if (\is_array($arg) === true) {
4064 2
            $arg = \implode('', $arg);
4065
        }
4066
4067 2
        $codepoints = self::codepoints($arg, false);
4068 2
        if (\count($codepoints) === 0) {
4069 2
            return null;
4070
        }
4071
4072 2
        $codepoint_max = \max($codepoints);
4073
4074 2
        return self::chr($codepoint_max);
4075
    }
4076
4077
    /**
4078
     * Calculates and returns the maximum number of bytes taken by any
4079
     * UTF-8 encoded character in the given string.
4080
     *
4081
     * @param string $str <p>The original Unicode string.</p>
4082
     *
4083
     * @return int max byte lengths of the given chars
4084
     */
4085 2
    public static function max_chr_width(string $str): int
4086
    {
4087 2
        $bytes = self::chr_size_list($str);
4088 2
        if (\count($bytes) > 0) {
4089 2
            return (int) \max($bytes);
4090
        }
4091
4092 2
        return 0;
4093
    }
4094
4095
    /**
4096
     * Checks whether mbstring is available on the server.
4097
     *
4098
     * @return bool
4099
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4100
     */
4101 27
    public static function mbstring_loaded(): bool
4102
    {
4103 27
        return \extension_loaded('mbstring');
4104
    }
4105
4106
    /**
4107
     * Returns the UTF-8 character with the minimum code point in the given data.
4108
     *
4109
     * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4110
     *
4111
     * @return string|null the character with the lowest code point than others, returns null on failure or empty input
4112
     */
4113 2
    public static function min($arg)
4114
    {
4115 2
        if (\is_array($arg) === true) {
4116 2
            $arg = \implode('', $arg);
4117
        }
4118
4119 2
        $codepoints = self::codepoints($arg, false);
4120 2
        if (\count($codepoints) === 0) {
4121 2
            return null;
4122
        }
4123
4124 2
        $codepoint_min = \min($codepoints);
4125
4126 2
        return self::chr($codepoint_min);
4127
    }
4128
4129
    /**
4130
     * alias for "UTF8::normalize_encoding()"
4131
     *
4132
     * @see        UTF8::normalize_encoding()
4133
     *
4134
     * @param mixed $encoding
4135
     * @param mixed $fallback
4136
     *
4137
     * @return mixed
4138
     *
4139
     * @deprecated <p>use "UTF8::normalize_encoding()"</p>
4140
     */
4141 2
    public static function normalizeEncoding($encoding, $fallback = '')
4142
    {
4143 2
        return self::normalize_encoding($encoding, $fallback);
4144
    }
4145
4146
    /**
4147
     * Normalize the encoding-"name" input.
4148
     *
4149
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4150
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4151
     *
4152
     * @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)
4153
     */
4154 323
    public static function normalize_encoding($encoding, $fallback = '')
4155
    {
4156 323
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4157
4158
        // init
4159 323
        $encoding = (string) $encoding;
4160
4161 323
        if (!$encoding) {
4162 278
            return $fallback;
4163
        }
4164
4165
        if (
4166 50
            $encoding === 'UTF-8'
4167
            ||
4168 50
            $encoding === 'UTF8'
4169
        ) {
4170 24
            return 'UTF-8';
4171
        }
4172
4173
        if (
4174 43
            $encoding === '8BIT'
4175
            ||
4176 43
            $encoding === 'BINARY'
4177
        ) {
4178
            return 'CP850';
4179
        }
4180
4181
        if (
4182 43
            $encoding === 'HTML'
4183
            ||
4184 43
            $encoding === 'HTML-ENTITIES'
4185
        ) {
4186 2
            return 'HTML-ENTITIES';
4187
        }
4188
4189
        if (
4190 43
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4191
            ||
4192 43
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4193
        ) {
4194 1
            return $fallback;
4195
        }
4196
4197 42
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4198 40
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4199
        }
4200
4201 6
        if (self::$ENCODINGS === null) {
4202 1
            self::$ENCODINGS = self::getData('encodings');
4203
        }
4204
4205 6
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4206 4
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4207
4208 4
            return $encoding;
4209
        }
4210
4211 5
        $encodingOrig = $encoding;
4212 5
        $encoding = \strtoupper($encoding);
4213 5
        $encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
4214
4215
        $equivalences = [
4216 5
            'ISO8859'     => 'ISO-8859-1',
4217
            'ISO88591'    => 'ISO-8859-1',
4218
            'ISO'         => 'ISO-8859-1',
4219
            'LATIN'       => 'ISO-8859-1',
4220
            'LATIN1'      => 'ISO-8859-1', // Western European
4221
            'ISO88592'    => 'ISO-8859-2',
4222
            'LATIN2'      => 'ISO-8859-2', // Central European
4223
            'ISO88593'    => 'ISO-8859-3',
4224
            'LATIN3'      => 'ISO-8859-3', // Southern European
4225
            'ISO88594'    => 'ISO-8859-4',
4226
            'LATIN4'      => 'ISO-8859-4', // Northern European
4227
            'ISO88595'    => 'ISO-8859-5',
4228
            'ISO88596'    => 'ISO-8859-6', // Greek
4229
            'ISO88597'    => 'ISO-8859-7',
4230
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4231
            'ISO88599'    => 'ISO-8859-9',
4232
            'LATIN5'      => 'ISO-8859-9', // Turkish
4233
            'ISO885911'   => 'ISO-8859-11',
4234
            'TIS620'      => 'ISO-8859-11', // Thai
4235
            'ISO885910'   => 'ISO-8859-10',
4236
            'LATIN6'      => 'ISO-8859-10', // Nordic
4237
            'ISO885913'   => 'ISO-8859-13',
4238
            'LATIN7'      => 'ISO-8859-13', // Baltic
4239
            'ISO885914'   => 'ISO-8859-14',
4240
            'LATIN8'      => 'ISO-8859-14', // Celtic
4241
            'ISO885915'   => 'ISO-8859-15',
4242
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4243
            'ISO885916'   => 'ISO-8859-16',
4244
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4245
            'CP1250'      => 'WINDOWS-1250',
4246
            'WIN1250'     => 'WINDOWS-1250',
4247
            'WINDOWS1250' => 'WINDOWS-1250',
4248
            'CP1251'      => 'WINDOWS-1251',
4249
            'WIN1251'     => 'WINDOWS-1251',
4250
            'WINDOWS1251' => 'WINDOWS-1251',
4251
            'CP1252'      => 'WINDOWS-1252',
4252
            'WIN1252'     => 'WINDOWS-1252',
4253
            'WINDOWS1252' => 'WINDOWS-1252',
4254
            'CP1253'      => 'WINDOWS-1253',
4255
            'WIN1253'     => 'WINDOWS-1253',
4256
            'WINDOWS1253' => 'WINDOWS-1253',
4257
            'CP1254'      => 'WINDOWS-1254',
4258
            'WIN1254'     => 'WINDOWS-1254',
4259
            'WINDOWS1254' => 'WINDOWS-1254',
4260
            'CP1255'      => 'WINDOWS-1255',
4261
            'WIN1255'     => 'WINDOWS-1255',
4262
            'WINDOWS1255' => 'WINDOWS-1255',
4263
            'CP1256'      => 'WINDOWS-1256',
4264
            'WIN1256'     => 'WINDOWS-1256',
4265
            'WINDOWS1256' => 'WINDOWS-1256',
4266
            'CP1257'      => 'WINDOWS-1257',
4267
            'WIN1257'     => 'WINDOWS-1257',
4268
            'WINDOWS1257' => 'WINDOWS-1257',
4269
            'CP1258'      => 'WINDOWS-1258',
4270
            'WIN1258'     => 'WINDOWS-1258',
4271
            'WINDOWS1258' => 'WINDOWS-1258',
4272
            'UTF16'       => 'UTF-16',
4273
            'UTF32'       => 'UTF-32',
4274
            'UTF8'        => 'UTF-8',
4275
            'UTF'         => 'UTF-8',
4276
            'UTF7'        => 'UTF-7',
4277
            '8BIT'        => 'CP850',
4278
            'BINARY'      => 'CP850',
4279
        ];
4280
4281 5
        if (!empty($equivalences[$encodingUpperHelper])) {
4282 4
            $encoding = $equivalences[$encodingUpperHelper];
4283
        }
4284
4285 5
        $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
4286
4287 5
        return $encoding;
4288
    }
4289
4290
    /**
4291
     * Standardize line ending to unix-like.
4292
     *
4293
     * @param string $str
4294
     *
4295
     * @return string
4296
     */
4297 5
    public static function normalize_line_ending(string $str): string
4298
    {
4299 5
        return \str_replace(["\r\n", "\r"], "\n", $str);
4300
    }
4301
4302
    /**
4303
     * Normalize some MS Word special characters.
4304
     *
4305
     * @param string $str <p>The string to be normalized.</p>
4306
     *
4307
     * @return string
4308
     */
4309 38
    public static function normalize_msword(string $str): string
4310
    {
4311 38
        if ($str === '') {
4312 2
            return '';
4313
        }
4314
4315
        $keys = [
4316 38
            "\xc2\xab", // « (U+00AB) in UTF-8
4317
            "\xc2\xbb", // » (U+00BB) in UTF-8
4318
            "\xe2\x80\x98", // ‘ (U+2018) in UTF-8
4319
            "\xe2\x80\x99", // ’ (U+2019) in UTF-8
4320
            "\xe2\x80\x9a", // ‚ (U+201A) in UTF-8
4321
            "\xe2\x80\x9b", // ‛ (U+201B) in UTF-8
4322
            "\xe2\x80\x9c", // “ (U+201C) in UTF-8
4323
            "\xe2\x80\x9d", // ” (U+201D) in UTF-8
4324
            "\xe2\x80\x9e", // „ (U+201E) in UTF-8
4325
            "\xe2\x80\x9f", // ‟ (U+201F) in UTF-8
4326
            "\xe2\x80\xb9", // ‹ (U+2039) in UTF-8
4327
            "\xe2\x80\xba", // › (U+203A) in UTF-8
4328
            "\xe2\x80\x93", // – (U+2013) in UTF-8
4329
            "\xe2\x80\x94", // — (U+2014) in UTF-8
4330
            "\xe2\x80\xa6", // … (U+2026) in UTF-8
4331
        ];
4332
4333
        $values = [
4334 38
            '"', // « (U+00AB) in UTF-8
4335
            '"', // » (U+00BB) in UTF-8
4336
            "'", // ‘ (U+2018) in UTF-8
4337
            "'", // ’ (U+2019) in UTF-8
4338
            "'", // ‚ (U+201A) in UTF-8
4339
            "'", // ‛ (U+201B) in UTF-8
4340
            '"', // “ (U+201C) in UTF-8
4341
            '"', // ” (U+201D) in UTF-8
4342
            '"', // „ (U+201E) in UTF-8
4343
            '"', // ‟ (U+201F) in UTF-8
4344
            "'", // ‹ (U+2039) in UTF-8
4345
            "'", // › (U+203A) in UTF-8
4346
            '-', // – (U+2013) in UTF-8
4347
            '-', // — (U+2014) in UTF-8
4348
            '...', // … (U+2026) in UTF-8
4349
        ];
4350
4351 38
        return \str_replace($keys, $values, $str);
4352
    }
4353
4354
    /**
4355
     * Normalize the whitespace.
4356
     *
4357
     * @param string $str                     <p>The string to be normalized.</p>
4358
     * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4359
     * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
4360
     *                                        bidirectional text chars.</p>
4361
     *
4362
     * @return string
4363
     */
4364 86
    public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string
4365
    {
4366 86
        if ($str === '') {
4367 9
            return '';
4368
        }
4369
4370 86
        static $WHITESPACE_CACHE = [];
4371 86
        $cacheKey = (int) $keepNonBreakingSpace;
4372
4373 86
        if (!isset($WHITESPACE_CACHE[$cacheKey])) {
4374 2
            $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
4375
4376 2
            if ($keepNonBreakingSpace === true) {
4377 1
                unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
4378
            }
4379
4380 2
            $WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]);
4381
        }
4382
4383 86
        if ($keepBidiUnicodeControls === false) {
4384 86
            static $BIDI_UNICODE_CONTROLS_CACHE = null;
4385
4386 86
            if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
4387 1
                $BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
4388
            }
4389
4390 86
            $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
4391
        }
4392
4393 86
        return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
4394
    }
4395
4396
    /**
4397
     * Calculates Unicode code point of the given UTF-8 encoded character.
4398
     *
4399
     * INFO: opposite to UTF8::chr()
4400
     *
4401
     * @param string $chr      <p>The character of which to calculate code point.<p/>
4402
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
4403
     *
4404
     * @return int
4405
     *             Unicode code point of the given character,<br>
4406
     *             0 on invalid UTF-8 byte sequence
4407
     */
4408 30
    public static function ord($chr, string $encoding = 'UTF-8'): int
4409
    {
4410 30
        static $CHAR_CACHE = [];
4411
4412
        // init
4413 30
        $chr = (string) $chr;
4414
4415 30
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
4416 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4417
        }
4418
4419 30
        $cacheKey = $chr . $encoding;
4420 30
        if (isset($CHAR_CACHE[$cacheKey]) === true) {
4421 30
            return $CHAR_CACHE[$cacheKey];
4422
        }
4423
4424
        // check again, if it's still not UTF-8
4425 12
        if ($encoding !== 'UTF-8') {
4426 3
            $chr = self::encode($encoding, $chr);
4427
        }
4428
4429 12
        if (self::$ORD === null) {
4430
            self::$ORD = self::getData('ord');
4431
        }
4432
4433 12
        if (isset(self::$ORD[$chr])) {
4434 12
            return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr];
4435
        }
4436
4437
        //
4438
        // fallback via "IntlChar"
4439
        //
4440
4441 6
        if (self::$SUPPORT['intlChar'] === true) {
4442
            /** @noinspection PhpComposerExtensionStubsInspection */
4443 5
            $code = \IntlChar::ord($chr);
4444 5
            if ($code) {
4445 5
                return $CHAR_CACHE[$cacheKey] = $code;
4446
            }
4447
        }
4448
4449
        //
4450
        // fallback via vanilla php
4451
        //
4452
4453
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4454 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
4455 1
        $code = $chr ? $chr[1] : 0;
4456
4457 1
        if ($code >= 0xF0 && isset($chr[4])) {
4458
            /** @noinspection UnnecessaryCastingInspection */
4459
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
4460
        }
4461
4462 1
        if ($code >= 0xE0 && isset($chr[3])) {
4463
            /** @noinspection UnnecessaryCastingInspection */
4464 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
4465
        }
4466
4467 1
        if ($code >= 0xC0 && isset($chr[2])) {
4468
            /** @noinspection UnnecessaryCastingInspection */
4469 1
            return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
4470
        }
4471
4472
        return $CHAR_CACHE[$cacheKey] = $code;
4473
    }
4474
4475
    /**
4476
     * Parses the string into an array (into the the second parameter).
4477
     *
4478
     * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
4479
     *          if the second parameter is not set!
4480
     *
4481
     * @see http://php.net/manual/en/function.parse-str.php
4482
     *
4483
     * @param string $str       <p>The input string.</p>
4484
     * @param array  $result    <p>The result will be returned into this reference parameter.</p>
4485
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4486
     *
4487
     * @return bool
4488
     *              Will return <strong>false</strong> if php can't parse the string and we haven't any $result
4489
     */
4490 2
    public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool
4491
    {
4492 2
        if ($cleanUtf8 === true) {
4493 2
            $str = self::clean($str);
4494
        }
4495
4496 2
        if (self::$SUPPORT['mbstring'] === true) {
4497 2
            $return = \mb_parse_str($str, $result);
4498
4499 2
            return $return !== false && $result !== [];
4500
        }
4501
4502
        /** @noinspection PhpVoidFunctionResultUsedInspection */
4503
        \parse_str($str, $result);
4504
4505
        return $result !== [];
4506
    }
4507
4508
    /**
4509
     * Checks if \u modifier is available that enables Unicode support in PCRE.
4510
     *
4511
     * @return bool
4512
     *              <strong>true</strong> if support is available,<br>
4513
     *              <strong>false</strong> otherwise
4514
     */
4515 102
    public static function pcre_utf8_support(): bool
4516
    {
4517
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4518 102
        return (bool) @\preg_match('//u', '');
4519
    }
4520
4521
    /**
4522
     * Create an array containing a range of UTF-8 characters.
4523
     *
4524
     * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
4525
     * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
4526
     *
4527
     * @return string[]
4528
     */
4529 2
    public static function range($var1, $var2): array
4530
    {
4531 2
        if (!$var1 || !$var2) {
4532 2
            return [];
4533
        }
4534
4535 2
        if (self::$SUPPORT['ctype'] === false) {
4536
            throw new \RuntimeException('ext-ctype: is not installed');
4537
        }
4538
4539
        /** @noinspection PhpComposerExtensionStubsInspection */
4540 2
        if (\ctype_digit((string) $var1)) {
4541 2
            $start = (int) $var1;
4542 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) {
4543
            $start = (int) self::hex_to_int($var1);
4544
        } else {
4545 2
            $start = self::ord($var1);
4546
        }
4547
4548 2
        if (!$start) {
4549
            return [];
4550
        }
4551
4552
        /** @noinspection PhpComposerExtensionStubsInspection */
4553 2
        if (\ctype_digit((string) $var2)) {
4554 2
            $end = (int) $var2;
4555 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) {
4556
            $end = (int) self::hex_to_int($var2);
4557
        } else {
4558 2
            $end = self::ord($var2);
4559
        }
4560
4561 2
        if (!$end) {
4562
            return [];
4563
        }
4564
4565 2
        return \array_map(
4566
            static function (int $i): string {
4567 2
                return (string) self::chr($i);
4568 2
            },
4569 2
            \range($start, $end)
4570
        );
4571
    }
4572
4573
    /**
4574
     * Multi decode html entity & fix urlencoded-win1252-chars.
4575
     *
4576
     * e.g:
4577
     * 'test+test'                     => 'test+test'
4578
     * 'D&#252;sseldorf'               => 'Düsseldorf'
4579
     * 'D%FCsseldorf'                  => 'Düsseldorf'
4580
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
4581
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
4582
     * 'Düsseldorf'                   => 'Düsseldorf'
4583
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
4584
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
4585
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
4586
     *
4587
     * @param string $str          <p>The input string.</p>
4588
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
4589
     *
4590
     * @return string
4591
     */
4592 3
    public static function rawurldecode(string $str, bool $multi_decode = true): string
4593
    {
4594 3
        if ($str === '') {
4595 2
            return '';
4596
        }
4597
4598
        if (
4599 3
            \strpos($str, '&') === false
4600
            &&
4601 3
            \strpos($str, '%') === false
4602
            &&
4603 3
            \strpos($str, '+') === false
4604
            &&
4605 3
            \strpos($str, '\u') === false
4606
        ) {
4607 2
            return self::fix_simple_utf8($str);
4608
        }
4609
4610 3
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
4611 3
        if (\preg_match($pattern, $str)) {
4612 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str));
4613
        }
4614
4615 3
        $flags = \ENT_QUOTES | \ENT_HTML5;
4616
4617 3
        if ($multi_decode === true) {
4618
            do {
4619 3
                $str_compare = $str;
4620
4621
                /**
4622
                 * @psalm-suppress PossiblyInvalidArgument
4623
                 */
4624 3
                $str = self::fix_simple_utf8(
4625 3
                    \rawurldecode(
4626 3
                        self::html_entity_decode(
4627 3
                            self::to_utf8($str),
4628 3
                            $flags
4629
                        )
4630
                    )
4631
                );
4632 3
            } while ($str_compare !== $str);
4633
        }
4634
4635 3
        return $str;
4636
    }
4637
4638
    /**
4639
     * Replaces all occurrences of $pattern in $str by $replacement.
4640
     *
4641
     * @param string $str         <p>The input string.</p>
4642
     * @param string $pattern     <p>The regular expression pattern.</p>
4643
     * @param string $replacement <p>The string to replace with.</p>
4644
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
4645
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
4646
     *
4647
     * @return string
4648
     */
4649 18
    public static function regex_replace(
4650
        string $str,
4651
        string $pattern,
4652
        string $replacement,
4653
        string $options = '',
4654
        string $delimiter = '/'
4655
    ): string {
4656 18
        if ($options === 'msr') {
4657 9
            $options = 'ms';
4658
        }
4659
4660
        // fallback
4661 18
        if (!$delimiter) {
4662
            $delimiter = '/';
4663
        }
4664
4665 18
        return (string) \preg_replace(
4666 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
4667 18
            $replacement,
4668 18
            $str
4669
        );
4670
    }
4671
4672
    /**
4673
     * alias for "UTF8::remove_bom()"
4674
     *
4675
     * @see        UTF8::remove_bom()
4676
     *
4677
     * @param string $str
4678
     *
4679
     * @return string
4680
     *
4681
     * @deprecated <p>use "UTF8::remove_bom()"</p>
4682
     */
4683
    public static function removeBOM(string $str): string
4684
    {
4685
        return self::remove_bom($str);
4686
    }
4687
4688
    /**
4689
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4690
     *
4691
     * @param string $str <p>The input string.</p>
4692
     *
4693
     * @return string string without UTF-BOM
4694
     */
4695 82
    public static function remove_bom(string $str): string
4696
    {
4697 82
        if ($str === '') {
4698 9
            return '';
4699
        }
4700
4701 82
        $strLength = \strlen($str);
4702 82
        foreach (self::$BOM as $bomString => $bomByteLength) {
4703 82
            if (\strpos($str, $bomString, 0) === 0) {
4704 10
                $strTmp = \substr($str, $bomByteLength, $strLength);
4705 10
                if ($strTmp === false) {
4706
                    return '';
4707
                }
4708
4709 10
                $strLength -= (int) $bomByteLength;
4710
4711 82
                $str = (string) $strTmp;
4712
            }
4713
        }
4714
4715 82
        return $str;
4716
    }
4717
4718
    /**
4719
     * Removes duplicate occurrences of a string in another string.
4720
     *
4721
     * @param string          $str  <p>The base string.</p>
4722
     * @param string|string[] $what <p>String to search for in the base string.</p>
4723
     *
4724
     * @return string the result string with removed duplicates
4725
     */
4726 2
    public static function remove_duplicates(string $str, $what = ' '): string
4727
    {
4728 2
        if (\is_string($what) === true) {
4729 2
            $what = [$what];
4730
        }
4731
4732 2
        if (\is_array($what) === true) {
0 ignored issues
show
introduced by
The condition is_array($what) === true is always true.
Loading history...
4733
            /** @noinspection ForeachSourceInspection */
4734 2
            foreach ($what as $item) {
4735 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str);
4736
            }
4737
        }
4738
4739 2
        return $str;
4740
    }
4741
4742
    /**
4743
     * Remove html via "strip_tags()" from the string.
4744
     *
4745
     * @param string $str
4746
     * @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should
4747
     *                              not be stripped. Default: null
4748
     *                              </p>
4749
     *
4750
     * @return string
4751
     */
4752 6
    public static function remove_html(string $str, string $allowableTags = ''): string
4753
    {
4754 6
        return \strip_tags($str, $allowableTags);
4755
    }
4756
4757
    /**
4758
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
4759
     *
4760
     * @param string $str
4761
     * @param string $replacement [optional] <p>Default is a empty string.</p>
4762
     *
4763
     * @return string
4764
     */
4765 6
    public static function remove_html_breaks(string $str, string $replacement = ''): string
4766
    {
4767 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
4768
    }
4769
4770
    /**
4771
     * Remove invisible characters from a string.
4772
     *
4773
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4774
     *
4775
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4776
     *
4777
     * @param string $str
4778
     * @param bool   $url_encoded
4779
     * @param string $replacement
4780
     *
4781
     * @return string
4782
     */
4783 115
    public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string
4784
    {
4785
        // init
4786 115
        $non_displayables = [];
4787
4788
        // every control character except newline (dec 10),
4789
        // carriage return (dec 13) and horizontal tab (dec 09)
4790 115
        if ($url_encoded) {
4791 115
            $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
4792 115
            $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
4793
        }
4794
4795 115
        $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4796
4797
        do {
4798 115
            $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
4799 115
        } while ($count !== 0);
4800
4801 115
        return $str;
4802
    }
4803
4804
    /**
4805
     * Returns a new string with the prefix $substring removed, if present.
4806
     *
4807
     * @param string $str
4808
     * @param string $substring <p>The prefix to remove.</p>
4809
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4810
     *
4811
     * @return string string without the prefix $substring
4812
     */
4813 12
    public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string
4814
    {
4815 12
        if ($substring && \strpos($str, $substring) === 0) {
4816 6
            if ($encoding === 'UTF-8') {
4817 4
                return (string) \mb_substr(
4818 4
                    $str,
4819 4
                    (int) \mb_strlen($substring)
4820
                );
4821
            }
4822
4823 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4824
4825 2
            return (string) self::substr(
4826 2
                $str,
4827 2
                (int) self::strlen($substring, $encoding),
4828 2
                null,
4829 2
                $encoding
4830
            );
4831
        }
4832
4833 6
        return $str;
4834
    }
4835
4836
    /**
4837
     * Returns a new string with the suffix $substring removed, if present.
4838
     *
4839
     * @param string $str
4840
     * @param string $substring <p>The suffix to remove.</p>
4841
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
4842
     *
4843
     * @return string string having a $str without the suffix $substring
4844
     */
4845 12
    public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string
4846
    {
4847 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
4848 6
            if ($encoding === 'UTF-8') {
4849 4
                return (string) \mb_substr(
4850 4
                    $str,
4851 4
                    0,
4852 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
4853
                );
4854
            }
4855
4856 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4857
4858 2
            return (string) self::substr(
4859 2
                $str,
4860 2
                0,
4861 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
4862 2
                $encoding
4863
            );
4864
        }
4865
4866 6
        return $str;
4867
    }
4868
4869
    /**
4870
     * Replaces all occurrences of $search in $str by $replacement.
4871
     *
4872
     * @param string $str           <p>The input string.</p>
4873
     * @param string $search        <p>The needle to search for.</p>
4874
     * @param string $replacement   <p>The string to replace with.</p>
4875
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4876
     *
4877
     * @return string string after the replacements
4878
     */
4879 29
    public static function replace(
4880
        string $str,
4881
        string $search,
4882
        string $replacement,
4883
        bool $caseSensitive = true
4884
    ): string {
4885 29
        if ($caseSensitive) {
4886 22
            return \str_replace($search, $replacement, $str);
4887
        }
4888
4889 7
        return self::str_ireplace($search, $replacement, $str);
4890
    }
4891
4892
    /**
4893
     * Replaces all occurrences of $search in $str by $replacement.
4894
     *
4895
     * @param string       $str           <p>The input string.</p>
4896
     * @param array        $search        <p>The elements to search for.</p>
4897
     * @param array|string $replacement   <p>The string to replace with.</p>
4898
     * @param bool         $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
4899
     *
4900
     * @return string string after the replacements
4901
     */
4902 30
    public static function replace_all(
4903
        string $str,
4904
        array $search,
4905
        $replacement,
4906
        bool $caseSensitive = true
4907
    ): string {
4908 30
        if ($caseSensitive) {
4909 23
            return \str_replace($search, $replacement, $str);
4910
        }
4911
4912 7
        return self::str_ireplace($search, $replacement, $str);
4913
    }
4914
4915
    /**
4916
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4917
     *
4918
     * @param string $str                <p>The input string</p>
4919
     * @param string $replacementChar    <p>The replacement character.</p>
4920
     * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4921
     *
4922
     * @return string
4923
     */
4924 62
    public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string
4925
    {
4926 62
        if ($str === '') {
4927 9
            return '';
4928
        }
4929
4930 62
        if ($processInvalidUtf8 === true) {
4931 62
            $replacementCharHelper = $replacementChar;
4932 62
            if ($replacementChar === '') {
4933 62
                $replacementCharHelper = 'none';
4934
            }
4935
4936 62
            if (self::$SUPPORT['mbstring'] === false) {
4937
                // if there is no native support for "mbstring",
4938
                // then we need to clean the string before ...
4939
                $str = self::clean($str);
4940
            }
4941
4942 62
            $save = \mb_substitute_character();
4943 62
            \mb_substitute_character($replacementCharHelper);
4944
            // the polyfill maybe return false, so cast to string
4945 62
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4946 62
            \mb_substitute_character($save);
4947
        }
4948
4949 62
        return \str_replace(
4950
            [
4951 62
                "\xEF\xBF\xBD",
4952
                '�',
4953
            ],
4954
            [
4955 62
                $replacementChar,
4956 62
                $replacementChar,
4957
            ],
4958 62
            $str
4959
        );
4960
    }
4961
4962
    /**
4963
     * Strip whitespace or other characters from end of a UTF-8 string.
4964
     *
4965
     * @param string      $str   <p>The string to be trimmed.</p>
4966
     * @param string|null $chars <p>Optional characters to be stripped.</p>
4967
     *
4968
     * @return string the string with unwanted characters stripped from the right
4969
     */
4970 20
    public static function rtrim(string $str = '', string $chars = null): string
4971
    {
4972 20
        if ($str === '') {
4973 3
            return '';
4974
        }
4975
4976 19
        if ($chars) {
4977 8
            $chars = \preg_quote($chars, '/');
4978 8
            $pattern = "[${chars}]+\$";
4979
        } else {
4980 14
            $pattern = "[\s]+\$";
4981
        }
4982
4983 19
        if (self::$SUPPORT['mbstring'] === true) {
4984
            /** @noinspection PhpComposerExtensionStubsInspection */
4985 19
            return (string) \mb_ereg_replace($pattern, '', $str);
4986
        }
4987
4988
        return self::regex_replace($str, $pattern, '', '', '/');
4989
    }
4990
4991
    /**
4992
     * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4993
     */
4994 2
    public static function showSupport()
4995
    {
4996 2
        echo '<pre>';
4997 2
        foreach (self::$SUPPORT as $key => &$value) {
4998 2
            echo $key . ' - ' . \print_r($value, true) . "\n<br>";
4999
        }
5000 2
        unset($value);
5001 2
        echo '</pre>';
5002 2
    }
5003
5004
    /**
5005
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5006
     *
5007
     * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
5008
     * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5009
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
5010
     *
5011
     * @return string the HTML numbered entity
5012
     */
5013 2
    public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string
5014
    {
5015 2
        if ($char === '') {
5016 2
            return '';
5017
        }
5018
5019
        if (
5020 2
            $keepAsciiChars === true
5021
            &&
5022 2
            self::is_ascii($char) === true
5023
        ) {
5024 2
            return $char;
5025
        }
5026
5027 2
        return '&#' . self::ord($char, $encoding) . ';';
5028
    }
5029
5030
    /**
5031
     * @param string $str
5032
     * @param int    $tabLength
5033
     *
5034
     * @return string
5035
     */
5036 5
    public static function spaces_to_tabs(string $str, int $tabLength = 4): string
5037
    {
5038 5
        if ($tabLength === 4) {
5039 3
            $tab = '    ';
5040 2
        } elseif ($tabLength === 2) {
5041 1
            $tab = '  ';
5042
        } else {
5043 1
            $tab = \str_repeat(' ', $tabLength);
5044
        }
5045
5046 5
        return \str_replace($tab, "\t", $str);
5047
    }
5048
5049
    /**
5050
     * Convert a string to an array of Unicode characters.
5051
     *
5052
     * @param int|int[]|string|string[] $str                <p>The string to split into array.</p>
5053
     * @param int                       $length             [optional] <p>Max character length of each array
5054
     *                                                      element.</p>
5055
     * @param bool                      $cleanUtf8          [optional] <p>Remove non UTF-8 chars from the string.</p>
5056
     * @param bool                      $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use
5057
     *                                                      "mb_substr"</p>
5058
     *
5059
     * @return array
5060
     *               <p>An array containing chunks of the input.</p>
5061
     */
5062 87
    public static function str_split(
5063
        $str,
5064
        int $length = 1,
5065
        bool $cleanUtf8 = false,
5066
        bool $tryToUseMbFunction = true
5067
    ): array {
5068 87
        if ($length <= 0) {
5069 3
            return [];
5070
        }
5071
5072 86
        if (\is_array($str) === true) {
5073 2
            foreach ($str as $k => &$v) {
5074 2
                $v = self::str_split(
5075 2
                    $v,
5076 2
                    $length,
5077 2
                    $cleanUtf8,
5078 2
                    $tryToUseMbFunction
5079
                );
5080
            }
5081
5082 2
            return $str;
5083
        }
5084
5085
        // init
5086 86
        $str = (string) $str;
5087
5088 86
        if ($str === '') {
5089 13
            return [];
5090
        }
5091
5092 83
        if ($cleanUtf8 === true) {
5093 19
            $str = self::clean($str);
5094
        }
5095
5096
        if (
5097 83
            $tryToUseMbFunction === true
5098
            &&
5099 83
            self::$SUPPORT['mbstring'] === true
5100
        ) {
5101 79
            $iMax = \mb_strlen($str);
5102 79
            if ($iMax <= 127) {
5103 73
                $ret = [];
5104 73
                for ($i = 0; $i < $iMax; ++$i) {
5105 73
                    $ret[] = \mb_substr($str, $i, 1);
5106
                }
5107
            } else {
5108 15
                $retArray = [];
5109 15
                \preg_match_all('/./us', $str, $retArray);
5110 79
                $ret = $retArray[0] ?? [];
5111
            }
5112 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
5113 17
            $retArray = [];
5114 17
            \preg_match_all('/./us', $str, $retArray);
5115 17
            $ret = $retArray[0] ?? [];
5116
        } else {
5117
5118
            // fallback
5119
5120 8
            $ret = [];
5121 8
            $len = \strlen($str);
5122
5123
            /** @noinspection ForeachInvariantsInspection */
5124 8
            for ($i = 0; $i < $len; ++$i) {
5125 8
                if (($str[$i] & "\x80") === "\x00") {
5126 8
                    $ret[] = $str[$i];
5127
                } elseif (
5128 8
                    isset($str[$i + 1])
5129
                    &&
5130 8
                    ($str[$i] & "\xE0") === "\xC0"
5131
                ) {
5132 4
                    if (($str[$i + 1] & "\xC0") === "\x80") {
5133 4
                        $ret[] = $str[$i] . $str[$i + 1];
5134
5135 4
                        ++$i;
5136
                    }
5137
                } elseif (
5138 6
                    isset($str[$i + 2])
5139
                    &&
5140 6
                    ($str[$i] & "\xF0") === "\xE0"
5141
                ) {
5142
                    if (
5143 6
                        ($str[$i + 1] & "\xC0") === "\x80"
5144
                        &&
5145 6
                        ($str[$i + 2] & "\xC0") === "\x80"
5146
                    ) {
5147 6
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
5148
5149 6
                        $i += 2;
5150
                    }
5151
                } elseif (
5152
                    isset($str[$i + 3])
5153
                    &&
5154
                    ($str[$i] & "\xF8") === "\xF0"
5155
                ) {
5156
                    if (
5157
                        ($str[$i + 1] & "\xC0") === "\x80"
5158
                        &&
5159
                        ($str[$i + 2] & "\xC0") === "\x80"
5160
                        &&
5161
                        ($str[$i + 3] & "\xC0") === "\x80"
5162
                    ) {
5163
                        $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
5164
5165
                        $i += 3;
5166
                    }
5167
                }
5168
            }
5169
        }
5170
5171 83
        if ($length > 1) {
5172 11
            $ret = \array_chunk($ret, $length);
5173
5174 11
            return \array_map(
5175
                static function (array &$item): string {
5176 11
                    return \implode('', $item);
5177 11
                },
5178 11
                $ret
5179
            );
5180
        }
5181
5182 76
        if (isset($ret[0]) && $ret[0] === '') {
5183
            return [];
5184
        }
5185
5186 76
        return $ret;
5187
    }
5188
5189
    /**
5190
     * Returns a camelCase version of the string. Trims surrounding spaces,
5191
     * capitalizes letters following digits, spaces, dashes and underscores,
5192
     * and removes spaces, dashes, as well as underscores.
5193
     *
5194
     * @param string      $str                   <p>The input string.</p>
5195
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
5196
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5197
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
5198
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
5199
     *
5200
     * @return string
5201
     */
5202 32
    public static function str_camelize(
5203
        string $str,
5204
        string $encoding = 'UTF-8',
5205
        bool $cleanUtf8 = false,
5206
        string $lang = null,
5207
        bool $tryToKeepStringLength = false
5208
    ): string {
5209 32
        if ($cleanUtf8 === true) {
5210
            $str = self::clean($str);
5211
        }
5212
5213 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5214 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5215
        }
5216
5217 32
        $str = self::lcfirst(
5218 32
            \trim($str),
5219 32
            $encoding,
5220 32
            false,
5221 32
            $lang,
5222 32
            $tryToKeepStringLength
5223
        );
5224 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5225
5226 32
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5227
5228 32
        $str = (string) \preg_replace_callback(
5229 32
            '/[-_\s]+(.)?/u',
5230
            /**
5231
             * @param array $match
5232
             *
5233
             * @return string
5234
             */
5235
            static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string {
5236 27
                if (isset($match[1])) {
5237 27
                    if ($useMbFunction === true) {
5238 27
                        if ($encoding === 'UTF-8') {
5239 27
                            return \mb_strtoupper($match[1]);
5240
                        }
5241
5242
                        return \mb_strtoupper($match[1], $encoding);
5243
                    }
5244
5245
                    return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength);
5246
                }
5247
5248 1
                return '';
5249 32
            },
5250 32
            $str
5251
        );
5252
5253 32
        return (string) \preg_replace_callback(
5254 32
            '/[\d]+(.)?/u',
5255
            /**
5256
             * @param array $match
5257
             *
5258
             * @return string
5259
             */
5260
            static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string {
5261 6
                if ($useMbFunction === true) {
5262 6
                    if ($encoding === 'UTF-8') {
5263 6
                        return \mb_strtoupper($match[0]);
5264
                    }
5265
5266
                    return \mb_strtoupper($match[0], $encoding);
5267
                }
5268
5269
                return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5270 32
            },
5271 32
            $str
5272
        );
5273
    }
5274
5275
    /**
5276
     * Returns the string with the first letter of each word capitalized,
5277
     * except for when the word is a name which shouldn't be capitalized.
5278
     *
5279
     * @param string $str
5280
     *
5281
     * @return string string with $str capitalized
5282
     */
5283 1
    public static function str_capitalize_name(string $str): string
5284
    {
5285 1
        return self::str_capitalize_name_helper(
5286 1
            self::str_capitalize_name_helper(
5287 1
                self::collapse_whitespace($str),
5288 1
                ' '
5289
            ),
5290 1
            '-'
5291
        );
5292
    }
5293
5294
    /**
5295
     * Returns true if the string contains $needle, false otherwise. By default
5296
     * the comparison is case-sensitive, but can be made insensitive by setting
5297
     * $caseSensitive to false.
5298
     *
5299
     * @param string $haystack      <p>The input string.</p>
5300
     * @param string $needle        <p>Substring to look for.</p>
5301
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5302
     *
5303
     * @return bool whether or not $haystack contains $needle
5304
     */
5305 21
    public static function str_contains(
5306
        string $haystack,
5307
        string $needle,
5308
        bool $caseSensitive = true
5309
    ): bool {
5310 21
        if ($caseSensitive) {
5311 11
            return \strpos($haystack, $needle) !== false;
5312
        }
5313
5314 10
        return \mb_stripos($haystack, $needle) !== false;
5315
    }
5316
5317
    /**
5318
     * Returns true if the string contains all $needles, false otherwise. By
5319
     * default the comparison is case-sensitive, but can be made insensitive by
5320
     * setting $caseSensitive to false.
5321
     *
5322
     * @param string $haystack      <p>The input string.</p>
5323
     * @param array  $needles       <p>SubStrings to look for.</p>
5324
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5325
     *
5326
     * @return bool whether or not $haystack contains $needle
5327
     */
5328 44
    public static function str_contains_all(
5329
        string $haystack,
5330
        array $needles,
5331
        bool $caseSensitive = true
5332
    ): bool {
5333 44
        if ($haystack === '' || $needles === []) {
5334 1
            return false;
5335
        }
5336
5337
        /** @noinspection LoopWhichDoesNotLoopInspection */
5338 43
        foreach ($needles as &$needle) {
5339 43
            if (!$needle) {
5340 1
                return false;
5341
            }
5342
5343 42
            if ($caseSensitive) {
5344 22
                return \strpos($haystack, $needle) !== false;
5345
            }
5346
5347 20
            return \mb_stripos($haystack, $needle) !== false;
5348
        }
5349
5350
        return true;
5351
    }
5352
5353
    /**
5354
     * Returns true if the string contains any $needles, false otherwise. By
5355
     * default the comparison is case-sensitive, but can be made insensitive by
5356
     * setting $caseSensitive to false.
5357
     *
5358
     * @param string $haystack      <p>The input string.</p>
5359
     * @param array  $needles       <p>SubStrings to look for.</p>
5360
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5361
     *
5362
     * @return bool
5363
     *              Whether or not $str contains $needle
5364
     */
5365 43
    public static function str_contains_any(
5366
        string $haystack,
5367
        array $needles,
5368
        bool $caseSensitive = true
5369
    ): bool {
5370 43
        if ($haystack === '' || $needles === []) {
5371 1
            return false;
5372
        }
5373
5374
        /** @noinspection LoopWhichDoesNotLoopInspection */
5375 42
        foreach ($needles as &$needle) {
5376 42
            if (!$needle) {
5377
                return false;
5378
            }
5379
5380 42
            if ($caseSensitive) {
5381 22
                return \strpos($haystack, $needle) !== false;
5382
            }
5383
5384 20
            return \mb_stripos($haystack, $needle) !== false;
5385
        }
5386
5387
        return false;
5388
    }
5389
5390
    /**
5391
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
5392
     * inserted before uppercase characters (with the exception of the first
5393
     * character of the string), and in place of spaces as well as underscores.
5394
     *
5395
     * @param string $str      <p>The input string.</p>
5396
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5397
     *
5398
     * @return string
5399
     */
5400 19
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
5401
    {
5402 19
        return self::str_delimit($str, '-', $encoding);
5403
    }
5404
5405
    /**
5406
     * Returns a lowercase and trimmed string separated by the given delimiter.
5407
     * Delimiters are inserted before uppercase characters (with the exception
5408
     * of the first character of the string), and in place of spaces, dashes,
5409
     * and underscores. Alpha delimiters are not converted to lowercase.
5410
     *
5411
     * @param string      $str                   <p>The input string.</p>
5412
     * @param string      $delimiter             <p>Sequence used to separate parts of the string.</p>
5413
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
5414
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
5415
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt,
5416
     *                                           tr</p>
5417
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
5418
     *                                           ß</p>
5419
     *
5420
     * @return string
5421
     */
5422 49
    public static function str_delimit(
5423
        string $str,
5424
        string $delimiter,
5425
        string $encoding = 'UTF-8',
5426
        bool $cleanUtf8 = false,
5427
        string $lang = null,
5428
        bool $tryToKeepStringLength = false
5429
    ): string {
5430 49
        if (self::$SUPPORT['mbstring'] === true) {
5431
            /** @noinspection PhpComposerExtensionStubsInspection */
5432 49
            $str = (string) \mb_ereg_replace('\B([A-Z])', '-\1', \trim($str));
5433
5434 49
            $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5435 49
            if ($useMbFunction === true && $encoding === 'UTF-8') {
5436 22
                $str = \mb_strtolower($str);
5437
            } else {
5438 27
                $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5439
            }
5440
5441
            /** @noinspection PhpComposerExtensionStubsInspection */
5442 49
            return (string) \mb_ereg_replace('[-_\s]+', $delimiter, $str);
5443
        }
5444
5445
        $str = (string) \preg_replace('/\B([A-Z])/u', '-\1', \trim($str));
5446
5447
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
5448
        if ($useMbFunction === true && $encoding === 'UTF-8') {
5449
            $str = \mb_strtolower($str);
5450
        } else {
5451
            $str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
5452
        }
5453
5454
        return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str);
5455
    }
5456
5457
    /**
5458
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
5459
     *
5460
     * @param string $str <p>The input string.</p>
5461
     *
5462
     * @return false|string
5463
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
5464
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
5465
     */
5466 30
    public static function str_detect_encoding($str)
5467
    {
5468
        // init
5469 30
        $str = (string) $str;
5470
5471
        //
5472
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
5473
        //
5474
5475 30
        if (self::is_binary($str, true) === true) {
5476 11
            $isUtf16 = self::is_utf16($str, false);
5477 11
            if ($isUtf16 === 1) {
5478 2
                return 'UTF-16LE';
5479
            }
5480 11
            if ($isUtf16 === 2) {
5481 2
                return 'UTF-16BE';
5482
            }
5483
5484 9
            $isUtf32 = self::is_utf32($str, false);
5485 9
            if ($isUtf32 === 1) {
5486
                return 'UTF-32LE';
5487
            }
5488 9
            if ($isUtf32 === 2) {
5489
                return 'UTF-32BE';
5490
            }
5491
5492
            // is binary but not "UTF-16" or "UTF-32"
5493 9
            return false;
5494
        }
5495
5496
        //
5497
        // 2.) simple check for ASCII chars
5498
        //
5499
5500 26
        if (self::is_ascii($str) === true) {
5501 9
            return 'ASCII';
5502
        }
5503
5504
        //
5505
        // 3.) simple check for UTF-8 chars
5506
        //
5507
5508 26
        if (self::is_utf8($str) === true) {
5509 19
            return 'UTF-8';
5510
        }
5511
5512
        //
5513
        // 4.) check via "mb_detect_encoding()"
5514
        //
5515
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
5516
5517
        $detectOrder = [
5518 15
            'ISO-8859-1',
5519
            'ISO-8859-2',
5520
            'ISO-8859-3',
5521
            'ISO-8859-4',
5522
            'ISO-8859-5',
5523
            'ISO-8859-6',
5524
            'ISO-8859-7',
5525
            'ISO-8859-8',
5526
            'ISO-8859-9',
5527
            'ISO-8859-10',
5528
            'ISO-8859-13',
5529
            'ISO-8859-14',
5530
            'ISO-8859-15',
5531
            'ISO-8859-16',
5532
            'WINDOWS-1251',
5533
            'WINDOWS-1252',
5534
            'WINDOWS-1254',
5535
            'CP932',
5536
            'CP936',
5537
            'CP950',
5538
            'CP866',
5539
            'CP850',
5540
            'CP51932',
5541
            'CP50220',
5542
            'CP50221',
5543
            'CP50222',
5544
            'ISO-2022-JP',
5545
            'ISO-2022-KR',
5546
            'JIS',
5547
            'JIS-ms',
5548
            'EUC-CN',
5549
            'EUC-JP',
5550
        ];
5551
5552 15
        if (self::$SUPPORT['mbstring'] === true) {
5553
            // info: do not use the symfony polyfill here
5554 15
            $encoding = \mb_detect_encoding($str, $detectOrder, true);
5555 15
            if ($encoding) {
5556 15
                return $encoding;
5557
            }
5558
        }
5559
5560
        //
5561
        // 5.) check via "iconv()"
5562
        //
5563
5564
        if (self::$ENCODINGS === null) {
5565
            self::$ENCODINGS = self::getData('encodings');
5566
        }
5567
5568
        foreach (self::$ENCODINGS as $encodingTmp) {
5569
            // INFO: //IGNORE but still throw notice
5570
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
5571
            if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) {
5572
                return $encodingTmp;
5573
            }
5574
        }
5575
5576
        return false;
5577
    }
5578
5579
    /**
5580
     * Check if the string ends with the given substring.
5581
     *
5582
     * @param string $haystack <p>The string to search in.</p>
5583
     * @param string $needle   <p>The substring to search for.</p>
5584
     *
5585
     * @return bool
5586
     */
5587 9
    public static function str_ends_with(string $haystack, string $needle): bool
5588
    {
5589 9
        return \substr($haystack, -\strlen($needle)) === $needle;
5590
    }
5591
5592
    /**
5593
     * Returns true if the string ends with any of $substrings, false otherwise.
5594
     *
5595
     * - case-sensitive
5596
     *
5597
     * @param string   $str        <p>The input string.</p>
5598
     * @param string[] $substrings <p>Substrings to look for.</p>
5599
     *
5600
     * @return bool whether or not $str ends with $substring
5601
     */
5602 7
    public static function str_ends_with_any(string $str, array $substrings): bool
5603
    {
5604 7
        if ($substrings === []) {
5605
            return false;
5606
        }
5607
5608 7
        foreach ($substrings as &$substring) {
5609 7
            if (\substr($str, -\strlen($substring)) === $substring) {
5610 7
                return true;
5611
            }
5612
        }
5613
5614 6
        return false;
5615
    }
5616
5617
    /**
5618
     * Ensures that the string begins with $substring. If it doesn't, it's
5619
     * prepended.
5620
     *
5621
     * @param string $str       <p>The input string.</p>
5622
     * @param string $substring <p>The substring to add if not present.</p>
5623
     *
5624
     * @return string
5625
     */
5626 10
    public static function str_ensure_left(string $str, string $substring): string
5627
    {
5628
        if (
5629 10
            $substring !== ''
5630
            &&
5631 10
            \strpos($str, $substring) === 0
5632
        ) {
5633 6
            return $str;
5634
        }
5635
5636 4
        return $substring . $str;
5637
    }
5638
5639
    /**
5640
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
5641
     *
5642
     * @param string $str       <p>The input string.</p>
5643
     * @param string $substring <p>The substring to add if not present.</p>
5644
     *
5645
     * @return string
5646
     */
5647 10
    public static function str_ensure_right(string $str, string $substring): string
5648
    {
5649
        if (
5650 10
            $str === ''
5651
            ||
5652 10
            $substring === ''
5653
            ||
5654 10
            \substr($str, -\strlen($substring)) !== $substring
5655
        ) {
5656 4
            $str .= $substring;
5657
        }
5658
5659 10
        return $str;
5660
    }
5661
5662
    /**
5663
     * Capitalizes the first word of the string, replaces underscores with
5664
     * spaces, and strips '_id'.
5665
     *
5666
     * @param string $str
5667
     *
5668
     * @return string
5669
     */
5670 3
    public static function str_humanize($str): string
5671
    {
5672 3
        $str = \str_replace(
5673
            [
5674 3
                '_id',
5675
                '_',
5676
            ],
5677
            [
5678 3
                '',
5679
                ' ',
5680
            ],
5681 3
            $str
5682
        );
5683
5684 3
        return self::ucfirst(\trim($str));
5685
    }
5686
5687
    /**
5688
     * Check if the string ends with the given substring, case insensitive.
5689
     *
5690
     * @param string $haystack <p>The string to search in.</p>
5691
     * @param string $needle   <p>The substring to search for.</p>
5692
     *
5693
     * @return bool
5694
     */
5695 12
    public static function str_iends_with(string $haystack, string $needle): bool
5696
    {
5697 12
        if ($haystack === '' || $needle === '') {
5698 2
            return false;
5699
        }
5700
5701 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
5702
    }
5703
5704
    /**
5705
     * Returns true if the string ends with any of $substrings, false otherwise.
5706
     *
5707
     * - case-insensitive
5708
     *
5709
     * @param string   $str        <p>The input string.</p>
5710
     * @param string[] $substrings <p>Substrings to look for.</p>
5711
     *
5712
     * @return bool whether or not $str ends with $substring
5713
     */
5714 4
    public static function str_iends_with_any(string $str, array $substrings): bool
5715
    {
5716 4
        if ($substrings === []) {
5717
            return false;
5718
        }
5719
5720 4
        foreach ($substrings as &$substring) {
5721 4
            if (self::str_iends_with($str, $substring)) {
5722 4
                return true;
5723
            }
5724
        }
5725
5726
        return false;
5727
    }
5728
5729
    /**
5730
     * Returns the index of the first occurrence of $needle in the string,
5731
     * and false if not found. Accepts an optional offset from which to begin
5732
     * the search.
5733
     *
5734
     * @param string $str      <p>The input string.</p>
5735
     * @param string $needle   <p>Substring to look for.</p>
5736
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5737
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5738
     *
5739
     * @return false|int
5740
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5741
     */
5742 2
    public static function str_iindex_first(
5743
        string $str,
5744
        string $needle,
5745
        int $offset = 0,
5746
        string $encoding = 'UTF-8'
5747
    ) {
5748 2
        return self::stripos(
5749 2
            $str,
5750 2
            $needle,
5751 2
            $offset,
5752 2
            $encoding
5753
        );
5754
    }
5755
5756
    /**
5757
     * Returns the index of the last occurrence of $needle in the string,
5758
     * and false if not found. Accepts an optional offset from which to begin
5759
     * the search. Offsets may be negative to count from the last character
5760
     * in the string.
5761
     *
5762
     * @param string $str      <p>The input string.</p>
5763
     * @param string $needle   <p>Substring to look for.</p>
5764
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5765
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5766
     *
5767
     * @return false|int
5768
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5769
     */
5770
    public static function str_iindex_last(
5771
        string $str,
5772
        string $needle,
5773
        int $offset = 0,
5774
        string $encoding = 'UTF-8'
5775
    ) {
5776
        return self::strripos(
5777
            $str,
5778
            $needle,
5779
            $offset,
5780
            $encoding
5781
        );
5782
    }
5783
5784
    /**
5785
     * Returns the index of the first occurrence of $needle in the string,
5786
     * and false if not found. Accepts an optional offset from which to begin
5787
     * the search.
5788
     *
5789
     * @param string $str      <p>The input string.</p>
5790
     * @param string $needle   <p>Substring to look for.</p>
5791
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5792
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5793
     *
5794
     * @return false|int
5795
     *                   The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5796
     */
5797 10
    public static function str_index_first(
5798
        string $str,
5799
        string $needle,
5800
        int $offset = 0,
5801
        string $encoding = 'UTF-8'
5802
    ) {
5803 10
        return self::strpos(
5804 10
            $str,
5805 10
            $needle,
5806 10
            $offset,
5807 10
            $encoding
5808
        );
5809
    }
5810
5811
    /**
5812
     * Returns the index of the last occurrence of $needle in the string,
5813
     * and false if not found. Accepts an optional offset from which to begin
5814
     * the search. Offsets may be negative to count from the last character
5815
     * in the string.
5816
     *
5817
     * @param string $str      <p>The input string.</p>
5818
     * @param string $needle   <p>Substring to look for.</p>
5819
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
5820
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5821
     *
5822
     * @return false|int
5823
     *                   The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>
5824
     */
5825 10
    public static function str_index_last(
5826
        string $str,
5827
        string $needle,
5828
        int $offset = 0,
5829
        string $encoding = 'UTF-8'
5830
    ) {
5831 10
        return self::strrpos(
5832 10
            $str,
5833 10
            $needle,
5834 10
            $offset,
5835 10
            $encoding
5836
        );
5837
    }
5838
5839
    /**
5840
     * Inserts $substring into the string at the $index provided.
5841
     *
5842
     * @param string $str       <p>The input string.</p>
5843
     * @param string $substring <p>String to be inserted.</p>
5844
     * @param int    $index     <p>The index at which to insert the substring.</p>
5845
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5846
     *
5847
     * @return string
5848
     */
5849 8
    public static function str_insert(
5850
        string $str,
5851
        string $substring,
5852
        int $index,
5853
        string $encoding = 'UTF-8'
5854
    ): string {
5855 8
        if ($encoding === 'UTF-8') {
5856 4
            $len = (int) \mb_strlen($str);
5857 4
            if ($index > $len) {
5858
                return $str;
5859
            }
5860
5861
            /** @noinspection UnnecessaryCastingInspection */
5862 4
            return (string) \mb_substr($str, 0, $index) .
5863 4
                   $substring .
5864 4
                   (string) \mb_substr($str, $index, $len);
5865
        }
5866
5867 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
5868
5869 4
        $len = (int) self::strlen($str, $encoding);
5870 4
        if ($index > $len) {
5871 1
            return $str;
5872
        }
5873
5874 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
5875 3
               $substring .
5876 3
               ((string) self::substr($str, $index, $len, $encoding));
5877
    }
5878
5879
    /**
5880
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
5881
     *
5882
     * @see  http://php.net/manual/en/function.str-ireplace.php
5883
     *
5884
     * @param mixed $search  <p>
5885
     *                       Every replacement with search array is
5886
     *                       performed on the result of previous replacement.
5887
     *                       </p>
5888
     * @param mixed $replace <p>
5889
     *                       </p>
5890
     * @param mixed $subject <p>
5891
     *                       If subject is an array, then the search and
5892
     *                       replace is performed with every entry of
5893
     *                       subject, and the return value is an array as
5894
     *                       well.
5895
     *                       </p>
5896
     * @param int   $count   [optional] <p>
5897
     *                       The number of matched and replaced needles will
5898
     *                       be returned in count which is passed by
5899
     *                       reference.
5900
     *                       </p>
5901
     *
5902
     * @return mixed a string or an array of replacements
5903
     */
5904 29
    public static function str_ireplace($search, $replace, $subject, &$count = null)
5905
    {
5906 29
        $search = (array) $search;
5907
5908
        /** @noinspection AlterInForeachInspection */
5909 29
        foreach ($search as &$s) {
5910 29
            $s = (string) $s;
5911 29
            if ($s === '') {
5912 6
                $s = '/^(?<=.)$/';
5913
            } else {
5914 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
5915
            }
5916
        }
5917
5918 29
        $subject = \preg_replace($search, $replace, $subject, -1, $replace);
5919 29
        $count = $replace; // used as reference parameter
5920
5921 29
        return $subject;
5922
    }
5923
5924
    /**
5925
     * Replaces $search from the beginning of string with $replacement.
5926
     *
5927
     * @param string $str         <p>The input string.</p>
5928
     * @param string $search      <p>The string to search for.</p>
5929
     * @param string $replacement <p>The replacement.</p>
5930
     *
5931
     * @return string string after the replacements
5932
     */
5933 17
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
5934
    {
5935 17
        if ($str === '') {
5936 4
            if ($replacement === '') {
5937 2
                return '';
5938
            }
5939
5940 2
            if ($search === '') {
5941 2
                return $replacement;
5942
            }
5943
        }
5944
5945 13
        if ($search === '') {
5946 2
            return $str . $replacement;
5947
        }
5948
5949 11
        if (\stripos($str, $search) === 0) {
5950 10
            return $replacement . \substr($str, \strlen($search));
5951
        }
5952
5953 1
        return $str;
5954
    }
5955
5956
    /**
5957
     * Replaces $search from the ending of string with $replacement.
5958
     *
5959
     * @param string $str         <p>The input string.</p>
5960
     * @param string $search      <p>The string to search for.</p>
5961
     * @param string $replacement <p>The replacement.</p>
5962
     *
5963
     * @return string string after the replacements
5964
     */
5965 17
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
5966
    {
5967 17
        if ($str === '') {
5968 4
            if ($replacement === '') {
5969 2
                return '';
5970
            }
5971
5972 2
            if ($search === '') {
5973 2
                return $replacement;
5974
            }
5975
        }
5976
5977 13
        if ($search === '') {
5978 2
            return $str . $replacement;
5979
        }
5980
5981 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
5982 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
5983
        }
5984
5985 11
        return $str;
5986
    }
5987
5988
    /**
5989
     * Check if the string starts with the given substring, case insensitive.
5990
     *
5991
     * @param string $haystack <p>The string to search in.</p>
5992
     * @param string $needle   <p>The substring to search for.</p>
5993
     *
5994
     * @return bool
5995
     */
5996 12
    public static function str_istarts_with(string $haystack, string $needle): bool
5997
    {
5998 12
        if ($haystack === '' || $needle === '') {
5999 2
            return false;
6000
        }
6001
6002 12
        return self::stripos($haystack, $needle) === 0;
6003
    }
6004
6005
    /**
6006
     * Returns true if the string begins with any of $substrings, false otherwise.
6007
     *
6008
     * - case-insensitive
6009
     *
6010
     * @param string $str        <p>The input string.</p>
6011
     * @param array  $substrings <p>Substrings to look for.</p>
6012
     *
6013
     * @return bool whether or not $str starts with $substring
6014
     */
6015 4
    public static function str_istarts_with_any(string $str, array $substrings): bool
6016
    {
6017 4
        if ($str === '') {
6018
            return false;
6019
        }
6020
6021 4
        if ($substrings === []) {
6022
            return false;
6023
        }
6024
6025 4
        foreach ($substrings as &$substring) {
6026 4
            if (self::str_istarts_with($str, $substring)) {
6027 4
                return true;
6028
            }
6029
        }
6030
6031
        return false;
6032
    }
6033
6034
    /**
6035
     * Gets the substring after the first occurrence of a separator.
6036
     *
6037
     * @param string $str       <p>The input string.</p>
6038
     * @param string $separator <p>The string separator.</p>
6039
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6040
     *
6041
     * @return string
6042
     */
6043 1
    public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6044
    {
6045 1
        if ($separator === '' || $str === '') {
6046 1
            return '';
6047
        }
6048
6049 1
        $offset = self::str_iindex_first($str, $separator);
6050 1
        if ($offset === false) {
6051 1
            return '';
6052
        }
6053
6054 1
        if ($encoding === 'UTF-8') {
6055 1
            return (string) \mb_substr(
6056 1
                $str,
6057 1
                $offset + (int) \mb_strlen($separator)
6058
            );
6059
        }
6060
6061
        return (string) self::substr(
6062
            $str,
6063
            $offset + (int) self::strlen($separator, $encoding),
6064
            null,
6065
            $encoding
6066
        );
6067
    }
6068
6069
    /**
6070
     * Gets the substring after the last occurrence of a separator.
6071
     *
6072
     * @param string $str       <p>The input string.</p>
6073
     * @param string $separator <p>The string separator.</p>
6074
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6075
     *
6076
     * @return string
6077
     */
6078 1
    public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6079
    {
6080 1
        if ($separator === '' || $str === '') {
6081 1
            return '';
6082
        }
6083
6084 1
        $offset = self::strripos($str, $separator);
6085 1
        if ($offset === false) {
6086 1
            return '';
6087
        }
6088
6089 1
        if ($encoding === 'UTF-8') {
6090 1
            return (string) \mb_substr(
6091 1
                $str,
6092 1
                $offset + (int) self::strlen($separator)
6093
            );
6094
        }
6095
6096
        return (string) self::substr(
6097
            $str,
6098
            $offset + (int) self::strlen($separator, $encoding),
6099
            null,
6100
            $encoding
6101
        );
6102
    }
6103
6104
    /**
6105
     * Gets the substring before the first occurrence of a separator.
6106
     *
6107
     * @param string $str       <p>The input string.</p>
6108
     * @param string $separator <p>The string separator.</p>
6109
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6110
     *
6111
     * @return string
6112
     */
6113 1
    public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6114
    {
6115 1
        if ($separator === '' || $str === '') {
6116 1
            return '';
6117
        }
6118
6119 1
        $offset = self::str_iindex_first($str, $separator);
6120 1
        if ($offset === false) {
6121 1
            return '';
6122
        }
6123
6124 1
        if ($encoding === 'UTF-8') {
6125 1
            return (string) \mb_substr($str, 0, $offset);
6126
        }
6127
6128
        return (string) self::substr($str, 0, $offset, $encoding);
6129
    }
6130
6131
    /**
6132
     * Gets the substring before the last occurrence of a separator.
6133
     *
6134
     * @param string $str       <p>The input string.</p>
6135
     * @param string $separator <p>The string separator.</p>
6136
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6137
     *
6138
     * @return string
6139
     */
6140 1
    public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
6141
    {
6142 1
        if ($separator === '' || $str === '') {
6143 1
            return '';
6144
        }
6145
6146 1
        if ($encoding === 'UTF-8') {
6147 1
            $offset = \mb_strripos($str, $separator);
6148 1
            if ($offset === false) {
6149 1
                return '';
6150
            }
6151
6152 1
            return (string) \mb_substr($str, 0, $offset);
6153
        }
6154
6155
        $offset = self::strripos($str, $separator, 0, $encoding);
6156
        if ($offset === false) {
6157
            return '';
6158
        }
6159
6160
        return (string) self::substr($str, 0, $offset, $encoding);
6161
    }
6162
6163
    /**
6164
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
6165
     *
6166
     * @param string $str          <p>The input string.</p>
6167
     * @param string $needle       <p>The string to look for.</p>
6168
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6169
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6170
     *
6171
     * @return string
6172
     */
6173 2
    public static function str_isubstr_first(
6174
        string $str,
6175
        string $needle,
6176
        bool $beforeNeedle = false,
6177
        string $encoding = 'UTF-8'
6178
    ): string {
6179
        if (
6180 2
            $needle === ''
6181
            ||
6182 2
            $str === ''
6183
        ) {
6184 2
            return '';
6185
        }
6186
6187 2
        $part = self::stristr(
6188 2
            $str,
6189 2
            $needle,
6190 2
            $beforeNeedle,
6191 2
            $encoding
6192
        );
6193 2
        if ($part === false) {
6194 2
            return '';
6195
        }
6196
6197 2
        return $part;
6198
    }
6199
6200
    /**
6201
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
6202
     *
6203
     * @param string $str          <p>The input string.</p>
6204
     * @param string $needle       <p>The string to look for.</p>
6205
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
6206
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
6207
     *
6208
     * @return string
6209
     */
6210 1
    public static function str_isubstr_last(
6211
        string $str,
6212
        string $needle,
6213
        bool $beforeNeedle = false,
6214
        string $encoding = 'UTF-8'
6215
    ): string {
6216
        if (
6217 1
            $needle === ''
6218
            ||
6219 1
            $str === ''
6220
        ) {
6221 1
            return '';
6222
        }
6223
6224 1
        $part = self::strrichr($str, $needle, $beforeNeedle, $encoding);
6225 1
        if ($part === false) {
6226 1
            return '';
6227
        }
6228
6229 1
        return $part;
6230
    }
6231
6232
    /**
6233
     * Returns the last $n characters of the string.
6234
     *
6235
     * @param string $str      <p>The input string.</p>
6236
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
6237
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6238
     *
6239
     * @return string
6240
     */
6241 12
    public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string
6242
    {
6243 12
        if ($str === '' || $n <= 0) {
6244 4
            return '';
6245
        }
6246
6247 8
        if ($encoding === 'UTF-8') {
6248 4
            return (string) \mb_substr($str, -$n);
6249
        }
6250
6251 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6252
6253 4
        return (string) self::substr($str, -$n, null, $encoding);
6254
    }
6255
6256
    /**
6257
     * Limit the number of characters in a string.
6258
     *
6259
     * @param string $str      <p>The input string.</p>
6260
     * @param int    $length   [optional] <p>Default: 100</p>
6261
     * @param string $strAddOn [optional] <p>Default: …</p>
6262
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6263
     *
6264
     * @return string
6265
     */
6266 2
    public static function str_limit(
6267
        string $str,
6268
        int $length = 100,
6269
        string $strAddOn = '…',
6270
        string $encoding = 'UTF-8'
6271
    ): string {
6272 2
        if ($str === '' || $length <= 0) {
6273 2
            return '';
6274
        }
6275
6276 2
        if ($encoding === 'UTF-8') {
6277 2
            if ((int) \mb_strlen($str) <= $length) {
6278 2
                return $str;
6279
            }
6280
6281
            /** @noinspection UnnecessaryCastingInspection */
6282 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn;
6283
        }
6284
6285
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6286
6287
        if ((int) self::strlen($str, $encoding) <= $length) {
6288
            return $str;
6289
        }
6290
6291
        return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn;
6292
    }
6293
6294
    /**
6295
     * Limit the number of characters in a string, but also after the next word.
6296
     *
6297
     * @param string $str      <p>The input string.</p>
6298
     * @param int    $length   [optional] <p>Default: 100</p>
6299
     * @param string $strAddOn [optional] <p>Default: …</p>
6300
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6301
     *
6302
     * @return string
6303
     */
6304 6
    public static function str_limit_after_word(
6305
        string $str,
6306
        int $length = 100,
6307
        string $strAddOn = '…',
6308
        string $encoding = 'UTF-8'
6309
    ): string {
6310 6
        if ($str === '' || $length <= 0) {
6311 2
            return '';
6312
        }
6313
6314 6
        if ($encoding === 'UTF-8') {
6315
            /** @noinspection UnnecessaryCastingInspection */
6316 2
            if ((int) \mb_strlen($str) <= $length) {
6317 2
                return $str;
6318
            }
6319
6320 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
6321 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6322
            }
6323
6324 2
            $str = \mb_substr($str, 0, $length);
6325
6326 2
            $array = \explode(' ', $str);
6327 2
            \array_pop($array);
6328 2
            $new_str = \implode(' ', $array);
6329
6330 2
            if ($new_str === '') {
6331 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn;
6332
            }
6333
        } else {
6334 4
            if ((int) self::strlen($str, $encoding) <= $length) {
6335
                return $str;
6336
            }
6337
6338 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
6339 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6340
            }
6341
6342 1
            $str = self::substr($str, 0, $length, $encoding);
6343 1
            if ($str === false) {
6344
                return '' . $strAddOn;
6345
            }
6346
6347 1
            $array = \explode(' ', $str);
6348 1
            \array_pop($array);
6349 1
            $new_str = \implode(' ', $array);
6350
6351 1
            if ($new_str === '') {
6352
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn;
6353
            }
6354
        }
6355
6356 3
        return $new_str . $strAddOn;
6357
    }
6358
6359
    /**
6360
     * Returns the longest common prefix between the string and $otherStr.
6361
     *
6362
     * @param string $str      <p>The input sting.</p>
6363
     * @param string $otherStr <p>Second string for comparison.</p>
6364
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6365
     *
6366
     * @return string
6367
     */
6368 10
    public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6369
    {
6370
        // init
6371 10
        $longestCommonPrefix = '';
6372
6373 10
        if ($encoding === 'UTF-8') {
6374 5
            $maxLength = (int) \min(
6375 5
                \mb_strlen($str),
6376 5
                \mb_strlen($otherStr)
6377
            );
6378
6379 5
            for ($i = 0; $i < $maxLength; ++$i) {
6380 4
                $char = \mb_substr($str, $i, 1);
6381
6382
                if (
6383 4
                    $char !== false
6384
                    &&
6385 4
                    $char === \mb_substr($otherStr, $i, 1)
6386
                ) {
6387 3
                    $longestCommonPrefix .= $char;
6388
                } else {
6389 3
                    break;
6390
                }
6391
            }
6392
        } else {
6393 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6394
6395 5
            $maxLength = (int) \min(
6396 5
                self::strlen($str, $encoding),
6397 5
                self::strlen($otherStr, $encoding)
6398
            );
6399
6400 5
            for ($i = 0; $i < $maxLength; ++$i) {
6401 4
                $char = self::substr($str, $i, 1, $encoding);
6402
6403
                if (
6404 4
                    $char !== false
6405
                    &&
6406 4
                    $char === self::substr($otherStr, $i, 1, $encoding)
6407
                ) {
6408 3
                    $longestCommonPrefix .= $char;
6409
                } else {
6410 3
                    break;
6411
                }
6412
            }
6413
        }
6414
6415 10
        return $longestCommonPrefix;
6416
    }
6417
6418
    /**
6419
     * Returns the longest common substring between the string and $otherStr.
6420
     * In the case of ties, it returns that which occurs first.
6421
     *
6422
     * @param string $str
6423
     * @param string $otherStr <p>Second string for comparison.</p>
6424
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6425
     *
6426
     * @return string string with its $str being the longest common substring
6427
     */
6428 11
    public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6429
    {
6430 11
        if ($str === '' || $otherStr === '') {
6431 2
            return '';
6432
        }
6433
6434
        // Uses dynamic programming to solve
6435
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
6436
6437 9
        if ($encoding === 'UTF-8') {
6438 4
            $strLength = (int) \mb_strlen($str);
6439 4
            $otherLength = (int) \mb_strlen($otherStr);
6440
        } else {
6441 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6442
6443 5
            $strLength = (int) self::strlen($str, $encoding);
6444 5
            $otherLength = (int) self::strlen($otherStr, $encoding);
6445
        }
6446
6447
        // Return if either string is empty
6448 9
        if ($strLength === 0 || $otherLength === 0) {
6449
            return '';
6450
        }
6451
6452 9
        $len = 0;
6453 9
        $end = 0;
6454 9
        $table = \array_fill(
6455 9
            0,
6456 9
            $strLength + 1,
6457 9
            \array_fill(0, $otherLength + 1, 0)
6458
        );
6459
6460 9
        if ($encoding === 'UTF-8') {
6461 9
            for ($i = 1; $i <= $strLength; ++$i) {
6462 9
                for ($j = 1; $j <= $otherLength; ++$j) {
6463 9
                    $strChar = \mb_substr($str, $i - 1, 1);
6464 9
                    $otherChar = \mb_substr($otherStr, $j - 1, 1);
6465
6466 9
                    if ($strChar === $otherChar) {
6467 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6468 8
                        if ($table[$i][$j] > $len) {
6469 8
                            $len = $table[$i][$j];
6470 8
                            $end = $i;
6471
                        }
6472
                    } else {
6473 9
                        $table[$i][$j] = 0;
6474
                    }
6475
                }
6476
            }
6477
        } else {
6478
            for ($i = 1; $i <= $strLength; ++$i) {
6479
                for ($j = 1; $j <= $otherLength; ++$j) {
6480
                    $strChar = self::substr($str, $i - 1, 1, $encoding);
6481
                    $otherChar = self::substr($otherStr, $j - 1, 1, $encoding);
6482
6483
                    if ($strChar === $otherChar) {
6484
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
6485
                        if ($table[$i][$j] > $len) {
6486
                            $len = $table[$i][$j];
6487
                            $end = $i;
6488
                        }
6489
                    } else {
6490
                        $table[$i][$j] = 0;
6491
                    }
6492
                }
6493
            }
6494
        }
6495
6496 9
        if ($encoding === 'UTF-8') {
6497 9
            return (string) \mb_substr($str, $end - $len, $len);
6498
        }
6499
6500
        return (string) self::substr($str, $end - $len, $len, $encoding);
6501
    }
6502
6503
    /**
6504
     * Returns the longest common suffix between the string and $otherStr.
6505
     *
6506
     * @param string $str
6507
     * @param string $otherStr <p>Second string for comparison.</p>
6508
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6509
     *
6510
     * @return string
6511
     */
6512 10
    public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string
6513
    {
6514 10
        if ($str === '' || $otherStr === '') {
6515 2
            return '';
6516
        }
6517
6518 8
        if ($encoding === 'UTF-8') {
6519 4
            $maxLength = (int) \min(
6520 4
                \mb_strlen($str, $encoding),
6521 4
                \mb_strlen($otherStr, $encoding)
6522
            );
6523
6524 4
            $longestCommonSuffix = '';
6525 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6526 4
                $char = \mb_substr($str, -$i, 1);
6527
6528
                if (
6529 4
                    $char !== false
6530
                    &&
6531 4
                    $char === \mb_substr($otherStr, -$i, 1)
6532
                ) {
6533 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6534
                } else {
6535 3
                    break;
6536
                }
6537
            }
6538
        } else {
6539 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
6540
6541 4
            $maxLength = (int) \min(
6542 4
                self::strlen($str, $encoding),
6543 4
                self::strlen($otherStr, $encoding)
6544
            );
6545
6546 4
            $longestCommonSuffix = '';
6547 4
            for ($i = 1; $i <= $maxLength; ++$i) {
6548 4
                $char = self::substr($str, -$i, 1, $encoding);
6549
6550
                if (
6551 4
                    $char !== false
6552
                    &&
6553 4
                    $char === self::substr($otherStr, -$i, 1, $encoding)
6554
                ) {
6555 3
                    $longestCommonSuffix = $char . $longestCommonSuffix;
6556
                } else {
6557 3
                    break;
6558
                }
6559
            }
6560
        }
6561
6562 8
        return $longestCommonSuffix;
6563
    }
6564
6565
    /**
6566
     * Returns true if $str matches the supplied pattern, false otherwise.
6567
     *
6568
     * @param string $str     <p>The input string.</p>
6569
     * @param string $pattern <p>Regex pattern to match against.</p>
6570
     *
6571
     * @return bool whether or not $str matches the pattern
6572
     */
6573
    public static function str_matches_pattern(string $str, string $pattern): bool
6574
    {
6575
        return (bool) \preg_match('/' . $pattern . '/u', $str);
6576
    }
6577
6578
    /**
6579
     * Returns whether or not a character exists at an index. Offsets may be
6580
     * negative to count from the last character in the string. Implements
6581
     * part of the ArrayAccess interface.
6582
     *
6583
     * @param string $str      <p>The input string.</p>
6584
     * @param int    $offset   <p>The index to check.</p>
6585
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6586
     *
6587
     * @return bool whether or not the index exists
6588
     */
6589 6
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
6590
    {
6591
        // init
6592 6
        $length = (int) self::strlen($str, $encoding);
6593
6594 6
        if ($offset >= 0) {
6595 3
            return $length > $offset;
6596
        }
6597
6598 3
        return $length >= \abs($offset);
6599
    }
6600
6601
    /**
6602
     * Returns the character at the given index. Offsets may be negative to
6603
     * count from the last character in the string. Implements part of the
6604
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
6605
     * does not exist.
6606
     *
6607
     * @param string $str      <p>The input string.</p>
6608
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
6609
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6610
     *
6611
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
6612
     *
6613
     * @return string the character at the specified index
6614
     */
6615 2
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
6616
    {
6617
        // init
6618 2
        $length = (int) self::strlen($str);
6619
6620
        if (
6621 2
            ($index >= 0 && $length <= $index)
6622
            ||
6623 2
            $length < \abs($index)
6624
        ) {
6625 1
            throw new \OutOfBoundsException('No character exists at the index');
6626
        }
6627
6628 1
        return self::char_at($str, $index, $encoding);
6629
    }
6630
6631
    /**
6632
     * Pad a UTF-8 string to given length with another string.
6633
     *
6634
     * @param string     $str        <p>The input string.</p>
6635
     * @param int        $pad_length <p>The length of return string.</p>
6636
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
6637
     * @param int|string $pad_type   [optional] <p>
6638
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
6639
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
6640
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
6641
     *                               </p>
6642
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
6643
     *
6644
     * @return string returns the padded string
6645
     */
6646 41
    public static function str_pad(
6647
        string $str,
6648
        int $pad_length,
6649
        string $pad_string = ' ',
6650
        $pad_type = \STR_PAD_RIGHT,
6651
        string $encoding = 'UTF-8'
6652
    ): string {
6653 41
        if ($pad_length === 0 || $pad_string === '') {
6654 1
            return $str;
6655
        }
6656
6657 41
        if ($pad_type !== (int) $pad_type) {
6658 13
            if ($pad_type === 'left') {
6659 3
                $pad_type = \STR_PAD_LEFT;
6660 10
            } elseif ($pad_type === 'right') {
6661 6
                $pad_type = \STR_PAD_RIGHT;
6662 4
            } elseif ($pad_type === 'both') {
6663 3
                $pad_type = \STR_PAD_BOTH;
6664
            } else {
6665 1
                throw new \InvalidArgumentException(
6666 1
                    'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
6667
                );
6668
            }
6669
        }
6670
6671 40
        if ($encoding === 'UTF-8') {
6672 25
            $str_length = (int) \mb_strlen($str);
6673
6674 25
            if ($pad_length >= $str_length) {
6675
                switch ($pad_type) {
6676 25
                    case \STR_PAD_LEFT:
6677 8
                        $ps_length = (int) \mb_strlen($pad_string);
6678
6679 8
                        $diff = ($pad_length - $str_length);
6680
6681 8
                        $pre = (string) \mb_substr(
6682 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6683 8
                            0,
6684 8
                            $diff
6685
                        );
6686 8
                        $post = '';
6687
6688 8
                        break;
6689
6690 20
                    case \STR_PAD_BOTH:
6691 14
                        $diff = ($pad_length - $str_length);
6692
6693 14
                        $ps_length_left = (int) \floor($diff / 2);
6694
6695 14
                        $ps_length_right = (int) \ceil($diff / 2);
6696
6697 14
                        $pre = (string) \mb_substr(
6698 14
                            \str_repeat($pad_string, $ps_length_left),
6699 14
                            0,
6700 14
                            $ps_length_left
6701
                        );
6702 14
                        $post = (string) \mb_substr(
6703 14
                            \str_repeat($pad_string, $ps_length_right),
6704 14
                            0,
6705 14
                            $ps_length_right
6706
                        );
6707
6708 14
                        break;
6709
6710 9
                    case \STR_PAD_RIGHT:
6711
                    default:
6712 9
                        $ps_length = (int) \mb_strlen($pad_string);
6713
6714 9
                        $diff = ($pad_length - $str_length);
6715
6716 9
                        $post = (string) \mb_substr(
6717 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6718 9
                            0,
6719 9
                            $diff
6720
                        );
6721 9
                        $pre = '';
6722
                }
6723
6724 25
                return $pre . $str . $post;
6725
            }
6726
6727 3
            return $str;
6728
        }
6729
6730 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6731
6732 15
        $str_length = (int) self::strlen($str, $encoding);
6733
6734 15
        if ($pad_length >= $str_length) {
6735
            switch ($pad_type) {
6736 14
                case \STR_PAD_LEFT:
6737 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6738
6739 5
                    $diff = ($pad_length - $str_length);
6740
6741 5
                    $pre = (string) self::substr(
6742 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6743 5
                        0,
6744 5
                        $diff,
6745 5
                        $encoding
6746
                    );
6747 5
                    $post = '';
6748
6749 5
                    break;
6750
6751 9
                case \STR_PAD_BOTH:
6752 3
                    $diff = ($pad_length - $str_length);
6753
6754 3
                    $ps_length_left = (int) \floor($diff / 2);
6755
6756 3
                    $ps_length_right = (int) \ceil($diff / 2);
6757
6758 3
                    $pre = (string) self::substr(
6759 3
                        \str_repeat($pad_string, $ps_length_left),
6760 3
                        0,
6761 3
                        $ps_length_left,
6762 3
                        $encoding
6763
                    );
6764 3
                    $post = (string) self::substr(
6765 3
                        \str_repeat($pad_string, $ps_length_right),
6766 3
                        0,
6767 3
                        $ps_length_right,
6768 3
                        $encoding
6769
                    );
6770
6771 3
                    break;
6772
6773 6
                case \STR_PAD_RIGHT:
6774
                default:
6775 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
6776
6777 6
                    $diff = ($pad_length - $str_length);
6778
6779 6
                    $post = (string) self::substr(
6780 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
6781 6
                        0,
6782 6
                        $diff,
6783 6
                        $encoding
6784
                    );
6785 6
                    $pre = '';
6786
            }
6787
6788 14
            return $pre . $str . $post;
6789
        }
6790
6791 1
        return $str;
6792
    }
6793
6794
    /**
6795
     * Returns a new string of a given length such that both sides of the
6796
     * string are padded. Alias for pad() with a $padType of 'both'.
6797
     *
6798
     * @param string $str
6799
     * @param int    $length   <p>Desired string length after padding.</p>
6800
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6801
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6802
     *
6803
     * @return string string with padding applied
6804
     */
6805 11
    public static function str_pad_both(
6806
        string $str,
6807
        int $length,
6808
        string $padStr = ' ',
6809
        string $encoding = 'UTF-8'
6810
    ): string {
6811 11
        return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding);
6812
    }
6813
6814
    /**
6815
     * Returns a new string of a given length such that the beginning of the
6816
     * string is padded. Alias for pad() with a $padType of 'left'.
6817
     *
6818
     * @param string $str
6819
     * @param int    $length   <p>Desired string length after padding.</p>
6820
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6821
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6822
     *
6823
     * @return string string with left padding
6824
     */
6825 7
    public static function str_pad_left(
6826
        string $str,
6827
        int $length,
6828
        string $padStr = ' ',
6829
        string $encoding = 'UTF-8'
6830
    ): string {
6831 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding);
6832
    }
6833
6834
    /**
6835
     * Returns a new string of a given length such that the end of the string
6836
     * is padded. Alias for pad() with a $padType of 'right'.
6837
     *
6838
     * @param string $str
6839
     * @param int    $length   <p>Desired string length after padding.</p>
6840
     * @param string $padStr   [optional] <p>String used to pad, defaults to space. Default: ' '</p>
6841
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6842
     *
6843
     * @return string string with right padding
6844
     */
6845 7
    public static function str_pad_right(
6846
        string $str,
6847
        int $length,
6848
        string $padStr = ' ',
6849
        string $encoding = 'UTF-8'
6850
    ): string {
6851 7
        return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding);
6852
    }
6853
6854
    /**
6855
     * Repeat a string.
6856
     *
6857
     * @param string $str        <p>
6858
     *                           The string to be repeated.
6859
     *                           </p>
6860
     * @param int    $multiplier <p>
6861
     *                           Number of time the input string should be
6862
     *                           repeated.
6863
     *                           </p>
6864
     *                           <p>
6865
     *                           multiplier has to be greater than or equal to 0.
6866
     *                           If the multiplier is set to 0, the function
6867
     *                           will return an empty string.
6868
     *                           </p>
6869
     *
6870
     * @return string the repeated string
6871
     */
6872 9
    public static function str_repeat(string $str, int $multiplier): string
6873
    {
6874 9
        $str = self::filter($str);
6875
6876 9
        return \str_repeat($str, $multiplier);
6877
    }
6878
6879
    /**
6880
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
6881
     *
6882
     * Replace all occurrences of the search string with the replacement string
6883
     *
6884
     * @see http://php.net/manual/en/function.str-replace.php
6885
     *
6886
     * @param mixed $search  <p>
6887
     *                       The value being searched for, otherwise known as the needle.
6888
     *                       An array may be used to designate multiple needles.
6889
     *                       </p>
6890
     * @param mixed $replace <p>
6891
     *                       The replacement value that replaces found search
6892
     *                       values. An array may be used to designate multiple replacements.
6893
     *                       </p>
6894
     * @param mixed $subject <p>
6895
     *                       The string or array being searched and replaced on,
6896
     *                       otherwise known as the haystack.
6897
     *                       </p>
6898
     *                       <p>
6899
     *                       If subject is an array, then the search and
6900
     *                       replace is performed with every entry of
6901
     *                       subject, and the return value is an array as
6902
     *                       well.
6903
     *                       </p>
6904
     * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles
6905
     *
6906
     * @return mixed this function returns a string or an array with the replaced values
6907
     */
6908 12
    public static function str_replace(
6909
        $search,
6910
        $replace,
6911
        $subject,
6912
        int &$count = null
6913
    ) {
6914
        /** @psalm-suppress PossiblyNullArgument */
6915 12
        return \str_replace($search, $replace, $subject, $count);
6916
    }
6917
6918
    /**
6919
     * Replaces $search from the beginning of string with $replacement.
6920
     *
6921
     * @param string $str         <p>The input string.</p>
6922
     * @param string $search      <p>The string to search for.</p>
6923
     * @param string $replacement <p>The replacement.</p>
6924
     *
6925
     * @return string string after the replacements
6926
     */
6927 17
    public static function str_replace_beginning(string $str, string $search, string $replacement): string
6928
    {
6929 17
        if ($str === '') {
6930 4
            if ($replacement === '') {
6931 2
                return '';
6932
            }
6933
6934 2
            if ($search === '') {
6935 2
                return $replacement;
6936
            }
6937
        }
6938
6939 13
        if ($search === '') {
6940 2
            return $str . $replacement;
6941
        }
6942
6943 11
        if (\strpos($str, $search) === 0) {
6944 9
            return $replacement . \substr($str, \strlen($search));
6945
        }
6946
6947 2
        return $str;
6948
    }
6949
6950
    /**
6951
     * Replaces $search from the ending of string with $replacement.
6952
     *
6953
     * @param string $str         <p>The input string.</p>
6954
     * @param string $search      <p>The string to search for.</p>
6955
     * @param string $replacement <p>The replacement.</p>
6956
     *
6957
     * @return string string after the replacements
6958
     */
6959 17
    public static function str_replace_ending(string $str, string $search, string $replacement): string
6960
    {
6961 17
        if ($str === '') {
6962 4
            if ($replacement === '') {
6963 2
                return '';
6964
            }
6965
6966 2
            if ($search === '') {
6967 2
                return $replacement;
6968
            }
6969
        }
6970
6971 13
        if ($search === '') {
6972 2
            return $str . $replacement;
6973
        }
6974
6975 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6976 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6977
        }
6978
6979 11
        return $str;
6980
    }
6981
6982
    /**
6983
     * Replace the first "$search"-term with the "$replace"-term.
6984
     *
6985
     * @param string $search
6986
     * @param string $replace
6987
     * @param string $subject
6988
     *
6989
     * @return string
6990
     *
6991
     * @psalm-suppress InvalidReturnType
6992
     */
6993 2
    public static function str_replace_first(string $search, string $replace, string $subject): string
6994
    {
6995 2
        $pos = self::strpos($subject, $search);
6996
6997 2
        if ($pos !== false) {
6998
            /** @psalm-suppress InvalidReturnStatement */
6999 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7000
        }
7001
7002 2
        return $subject;
7003
    }
7004
7005
    /**
7006
     * Replace the last "$search"-term with the "$replace"-term.
7007
     *
7008
     * @param string $search
7009
     * @param string $replace
7010
     * @param string $subject
7011
     *
7012
     * @return string
7013
     *
7014
     * @psalm-suppress InvalidReturnType
7015
     */
7016 2
    public static function str_replace_last(
7017
        string $search,
7018
        string $replace,
7019
        string $subject
7020
    ): string {
7021 2
        $pos = self::strrpos($subject, $search);
7022 2
        if ($pos !== false) {
7023
            /** @psalm-suppress InvalidReturnStatement */
7024 2
            return self::substr_replace($subject, $replace, $pos, (int) self::strlen($search));
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
7025
        }
7026
7027 2
        return $subject;
7028
    }
7029
7030
    /**
7031
     * Shuffles all the characters in the string.
7032
     *
7033
     * PS: uses random algorithm which is weak for cryptography purposes
7034
     *
7035
     * @param string $str      <p>The input string</p>
7036
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7037
     *
7038
     * @return string the shuffled string
7039
     */
7040 5
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
7041
    {
7042 5
        if ($encoding === 'UTF-8') {
7043 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
7044
            /** @noinspection NonSecureShuffleUsageInspection */
7045 5
            \shuffle($indexes);
7046
7047
            // init
7048 5
            $shuffledStr = '';
7049
7050 5
            foreach ($indexes as &$i) {
7051 5
                $tmpSubStr = \mb_substr($str, $i, 1);
7052 5
                if ($tmpSubStr !== false) {
7053 5
                    $shuffledStr .= $tmpSubStr;
7054
                }
7055
            }
7056
        } else {
7057
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7058
7059
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
7060
            /** @noinspection NonSecureShuffleUsageInspection */
7061
            \shuffle($indexes);
7062
7063
            // init
7064
            $shuffledStr = '';
7065
7066
            foreach ($indexes as &$i) {
7067
                $tmpSubStr = self::substr($str, $i, 1, $encoding);
7068
                if ($tmpSubStr !== false) {
7069
                    $shuffledStr .= $tmpSubStr;
7070
                }
7071
            }
7072
        }
7073
7074 5
        return $shuffledStr;
7075
    }
7076
7077
    /**
7078
     * Returns the substring beginning at $start, and up to, but not including
7079
     * the index specified by $end. If $end is omitted, the function extracts
7080
     * the remaining string. If $end is negative, it is computed from the end
7081
     * of the string.
7082
     *
7083
     * @param string $str
7084
     * @param int    $start    <p>Initial index from which to begin extraction.</p>
7085
     * @param int    $end      [optional] <p>Index at which to end extraction. Default: null</p>
7086
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7087
     *
7088
     * @return false|string
7089
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
7090
     *                      characters long, <b>FALSE</b> will be returned.
7091
     */
7092 18
    public static function str_slice(
7093
        string $str,
7094
        int $start,
7095
        int $end = null,
7096
        string $encoding = 'UTF-8'
7097
    ) {
7098 18
        if ($encoding === 'UTF-8') {
7099 7
            if ($end === null) {
7100 1
                $length = (int) \mb_strlen($str);
7101 6
            } elseif ($end >= 0 && $end <= $start) {
7102 2
                return '';
7103 4
            } elseif ($end < 0) {
7104 1
                $length = (int) \mb_strlen($str) + $end - $start;
7105
            } else {
7106 3
                $length = $end - $start;
7107
            }
7108
7109 5
            return \mb_substr($str, $start, $length);
7110
        }
7111
7112 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7113
7114 11
        if ($end === null) {
7115 5
            $length = (int) self::strlen($str, $encoding);
7116 6
        } elseif ($end >= 0 && $end <= $start) {
7117 2
            return '';
7118 4
        } elseif ($end < 0) {
7119 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
7120
        } else {
7121 3
            $length = $end - $start;
7122
        }
7123
7124 9
        return self::substr($str, $start, $length, $encoding);
7125
    }
7126
7127
    /**
7128
     * Convert a string to e.g.: "snake_case"
7129
     *
7130
     * @param string $str
7131
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7132
     *
7133
     * @return string string in snake_case
7134
     */
7135 20
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
7136
    {
7137 20
        if ($str === '') {
7138
            return '';
7139
        }
7140
7141 20
        $str = \str_replace(
7142 20
            '-',
7143 20
            '_',
7144 20
            self::normalize_whitespace($str)
7145
        );
7146
7147 20
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7148 17
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7149
        }
7150
7151 20
        $str = (string) \preg_replace_callback(
7152 20
            '/([\d|A-Z])/u',
7153
            /**
7154
             * @param string[] $matches
7155
             *
7156
             * @return string
7157
             */
7158
            static function (array $matches) use ($encoding): string {
7159 8
                $match = $matches[1];
7160 8
                $matchInt = (int) $match;
7161
7162 8
                if ((string) $matchInt === $match) {
7163 4
                    return '_' . $match . '_';
7164
                }
7165
7166 4
                if ($encoding === 'UTF-8') {
7167 4
                    return '_' . \mb_strtolower($match);
7168
                }
7169
7170
                return '_' . self::strtolower($match, $encoding);
7171 20
            },
7172 20
            $str
7173
        );
7174
7175 20
        $str = (string) \preg_replace(
7176
            [
7177 20
                '/\s+/',        // convert spaces to "_"
7178
                '/^\s+|\s+$/',  // trim leading & trailing spaces
7179
                '/_+/',         // remove double "_"
7180
            ],
7181
            [
7182 20
                '_',
7183
                '',
7184
                '_',
7185
            ],
7186 20
            $str
7187
        );
7188
7189 20
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
7190
    }
7191
7192
    /**
7193
     * Sort all characters according to code points.
7194
     *
7195
     * @param string $str    <p>A UTF-8 string.</p>
7196
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
7197
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
7198
     *
7199
     * @return string string of sorted characters
7200
     */
7201 2
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
7202
    {
7203 2
        $array = self::codepoints($str);
7204
7205 2
        if ($unique) {
7206 2
            $array = \array_flip(\array_flip($array));
7207
        }
7208
7209 2
        if ($desc) {
7210 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7210
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
7211
        } else {
7212 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

7212
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
7213
        }
7214
7215 2
        return self::string($array);
7216
    }
7217
7218
    /**
7219
     * alias for "UTF8::str_split()"
7220
     *
7221
     * @see UTF8::str_split()
7222
     *
7223
     * @param string|string[] $str
7224
     * @param int             $length
7225
     * @param bool            $cleanUtf8
7226
     *
7227
     * @return string[]
7228
     */
7229 9
    public static function split(
7230
        $str,
7231
        int $length = 1,
7232
        bool $cleanUtf8 = false
7233
    ): array {
7234 9
        return self::str_split($str, $length, $cleanUtf8);
7235
    }
7236
7237
    /**
7238
     * Splits the string with the provided regular expression, returning an
7239
     * array of Stringy objects. An optional integer $limit will truncate the
7240
     * results.
7241
     *
7242
     * @param string $str
7243
     * @param string $pattern <p>The regex with which to split the string.</p>
7244
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
7245
     *
7246
     * @return string[] an array of strings
7247
     */
7248 16
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
7249
    {
7250 16
        if ($limit === 0) {
7251 2
            return [];
7252
        }
7253
7254 14
        if ($pattern === '') {
7255 1
            return [$str];
7256
        }
7257
7258 13
        if (self::$SUPPORT['mbstring'] === true) {
7259 13
            if ($limit >= 0) {
7260
                /** @noinspection PhpComposerExtensionStubsInspection */
7261 8
                return \array_filter(
7262 8
                    \mb_split($pattern, $str),
7263
                    static function () use (&$limit): bool {
7264 8
                        return --$limit >= 0;
7265 8
                    }
7266
                );
7267
            }
7268
7269
            /** @noinspection PhpComposerExtensionStubsInspection */
7270 5
            return \mb_split($pattern, $str);
7271
        }
7272
7273
        if ($limit > 0) {
7274
            ++$limit;
7275
        } else {
7276
            $limit = -1;
7277
        }
7278
7279
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
7280
7281
        if ($array === false) {
7282
            return [];
7283
        }
7284
7285
        if ($limit > 0 && \count($array) === $limit) {
7286
            \array_pop($array);
7287
        }
7288
7289
        return $array;
7290
    }
7291
7292
    /**
7293
     * Check if the string starts with the given substring.
7294
     *
7295
     * @param string $haystack <p>The string to search in.</p>
7296
     * @param string $needle   <p>The substring to search for.</p>
7297
     *
7298
     * @return bool
7299
     */
7300 19
    public static function str_starts_with(string $haystack, string $needle): bool
7301
    {
7302 19
        return \strpos($haystack, $needle) === 0;
7303
    }
7304
7305
    /**
7306
     * Returns true if the string begins with any of $substrings, false otherwise.
7307
     *
7308
     * - case-sensitive
7309
     *
7310
     * @param string $str        <p>The input string.</p>
7311
     * @param array  $substrings <p>Substrings to look for.</p>
7312
     *
7313
     * @return bool whether or not $str starts with $substring
7314
     */
7315 8
    public static function str_starts_with_any(string $str, array $substrings): bool
7316
    {
7317 8
        if ($str === '') {
7318
            return false;
7319
        }
7320
7321 8
        if ($substrings === []) {
7322
            return false;
7323
        }
7324
7325 8
        foreach ($substrings as &$substring) {
7326 8
            if (self::str_starts_with($str, $substring)) {
7327 8
                return true;
7328
            }
7329
        }
7330
7331 6
        return false;
7332
    }
7333
7334
    /**
7335
     * Gets the substring after the first occurrence of a separator.
7336
     *
7337
     * @param string $str       <p>The input string.</p>
7338
     * @param string $separator <p>The string separator.</p>
7339
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7340
     *
7341
     * @return string
7342
     */
7343 1
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7344
    {
7345 1
        if ($separator === '' || $str === '') {
7346 1
            return '';
7347
        }
7348
7349 1
        if ($encoding === 'UTF-8') {
7350 1
            $offset = \mb_strpos($str, $separator);
7351 1
            if ($offset === false) {
7352 1
                return '';
7353
            }
7354
7355 1
            return (string) \mb_substr(
7356 1
                $str,
7357 1
                $offset + (int) \mb_strlen($separator)
7358
            );
7359
        }
7360
7361
        $offset = self::strpos($str, $separator, 0, $encoding);
7362
        if ($offset === false) {
7363
            return '';
7364
        }
7365
7366
        return (string) \mb_substr(
7367
            $str,
7368
            $offset + (int) self::strlen($separator, $encoding),
7369
            null,
7370
            $encoding
7371
        );
7372
    }
7373
7374
    /**
7375
     * Gets the substring after the last occurrence of a separator.
7376
     *
7377
     * @param string $str       <p>The input string.</p>
7378
     * @param string $separator <p>The string separator.</p>
7379
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7380
     *
7381
     * @return string
7382
     */
7383 1
    public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7384
    {
7385 1
        if ($separator === '' || $str === '') {
7386 1
            return '';
7387
        }
7388
7389 1
        if ($encoding === 'UTF-8') {
7390 1
            $offset = \mb_strrpos($str, $separator);
7391 1
            if ($offset === false) {
7392 1
                return '';
7393
            }
7394
7395 1
            return (string) \mb_substr(
7396 1
                $str,
7397 1
                $offset + (int) \mb_strlen($separator)
7398
            );
7399
        }
7400
7401
        $offset = self::strrpos($str, $separator, 0, $encoding);
7402
        if ($offset === false) {
7403
            return '';
7404
        }
7405
7406
        return (string) self::substr(
7407
            $str,
7408
            $offset + (int) self::strlen($separator, $encoding),
7409
            null,
7410
            $encoding
7411
        );
7412
    }
7413
7414
    /**
7415
     * Gets the substring before the first occurrence of a separator.
7416
     *
7417
     * @param string $str       <p>The input string.</p>
7418
     * @param string $separator <p>The string separator.</p>
7419
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7420
     *
7421
     * @return string
7422
     */
7423 1
    public static function str_substr_before_first_separator(
7424
        string $str,
7425
        string $separator,
7426
        string $encoding = 'UTF-8'
7427
    ): string {
7428 1
        if ($separator === '' || $str === '') {
7429 1
            return '';
7430
        }
7431
7432 1
        if ($encoding === 'UTF-8') {
7433 1
            $offset = \mb_strpos($str, $separator);
7434 1
            if ($offset === false) {
7435 1
                return '';
7436
            }
7437
7438 1
            return (string) \mb_substr(
7439 1
                $str,
7440 1
                0,
7441 1
                $offset
7442
            );
7443
        }
7444
7445
        $offset = self::strpos($str, $separator, 0, $encoding);
7446
        if ($offset === false) {
7447
            return '';
7448
        }
7449
7450
        return (string) self::substr(
7451
            $str,
7452
            0,
7453
            $offset,
7454
            $encoding
7455
        );
7456
    }
7457
7458
    /**
7459
     * Gets the substring before the last occurrence of a separator.
7460
     *
7461
     * @param string $str       <p>The input string.</p>
7462
     * @param string $separator <p>The string separator.</p>
7463
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7464
     *
7465
     * @return string
7466
     */
7467 1
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
7468
    {
7469 1
        if ($separator === '' || $str === '') {
7470 1
            return '';
7471
        }
7472
7473 1
        if ($encoding === 'UTF-8') {
7474 1
            $offset = \mb_strrpos($str, $separator);
7475 1
            if ($offset === false) {
7476 1
                return '';
7477
            }
7478
7479 1
            return (string) \mb_substr(
7480 1
                $str,
7481 1
                0,
7482 1
                $offset
7483
            );
7484
        }
7485
7486
        $offset = self::strrpos($str, $separator, 0, $encoding);
7487
        if ($offset === false) {
7488
            return '';
7489
        }
7490
7491
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7492
7493
        return (string) self::substr(
7494
            $str,
7495
            0,
7496
            $offset,
7497
            $encoding
7498
        );
7499
    }
7500
7501
    /**
7502
     * Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle".
7503
     *
7504
     * @param string $str          <p>The input string.</p>
7505
     * @param string $needle       <p>The string to look for.</p>
7506
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7507
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7508
     *
7509
     * @return string
7510
     */
7511 2
    public static function str_substr_first(
7512
        string $str,
7513
        string $needle,
7514
        bool $beforeNeedle = false,
7515
        string $encoding = 'UTF-8'
7516
    ): string {
7517 2
        if ($str === '' || $needle === '') {
7518 2
            return '';
7519
        }
7520
7521 2
        if ($encoding === 'UTF-8') {
7522 2
            if ($beforeNeedle === true) {
7523 1
                $part = \mb_strstr(
7524 1
                    $str,
7525 1
                    $needle,
7526 1
                    $beforeNeedle
7527
                );
7528
            } else {
7529 1
                $part = \mb_strstr(
7530 1
                    $str,
7531 2
                    $needle
7532
                );
7533
            }
7534
        } else {
7535
            $part = self::strstr(
7536
                $str,
7537
                $needle,
7538
                $beforeNeedle,
7539
                $encoding
7540
            );
7541
        }
7542
7543 2
        return $part === false ? '' : $part;
7544
    }
7545
7546
    /**
7547
     * Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle".
7548
     *
7549
     * @param string $str          <p>The input string.</p>
7550
     * @param string $needle       <p>The string to look for.</p>
7551
     * @param bool   $beforeNeedle [optional] <p>Default: false</p>
7552
     * @param string $encoding     [optional] <p>Default: 'UTF-8'</p>
7553
     *
7554
     * @return string
7555
     */
7556 2
    public static function str_substr_last(
7557
        string $str,
7558
        string $needle,
7559
        bool $beforeNeedle = false,
7560
        string $encoding = 'UTF-8'
7561
    ): string {
7562 2
        if ($str === '' || $needle === '') {
7563 2
            return '';
7564
        }
7565
7566 2
        if ($encoding === 'UTF-8') {
7567 2
            if ($beforeNeedle === true) {
7568 1
                $part = \mb_strrchr(
7569 1
                    $str,
7570 1
                    $needle,
7571 1
                    $beforeNeedle
7572
                );
7573
            } else {
7574 1
                $part = \mb_strrchr(
7575 1
                    $str,
7576 2
                    $needle
7577
                );
7578
            }
7579
        } else {
7580
            $part = self::strrchr(
7581
                $str,
7582
                $needle,
7583
                $beforeNeedle,
7584
                $encoding
7585
            );
7586
        }
7587
7588 2
        return $part === false ? '' : $part;
7589
    }
7590
7591
    /**
7592
     * Surrounds $str with the given substring.
7593
     *
7594
     * @param string $str
7595
     * @param string $substring <p>The substring to add to both sides.</P>
7596
     *
7597
     * @return string string with the substring both prepended and appended
7598
     */
7599 5
    public static function str_surround(string $str, string $substring): string
7600
    {
7601 5
        return $substring . $str . $substring;
7602
    }
7603
7604
    /**
7605
     * Returns a trimmed string with the first letter of each word capitalized.
7606
     * Also accepts an array, $ignore, allowing you to list words not to be
7607
     * capitalized.
7608
     *
7609
     * @param string              $str
7610
     * @param array|string[]|null $ignore                [optional] <p>An array of words not to capitalize or null.
7611
     *                                                   Default: null</p>
7612
     * @param string              $encoding              [optional] <p>Default: 'UTF-8'</p>
7613
     * @param bool                $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
7614
     * @param string|null         $lang                  [optional] <p>Set the language for special cases: az, el, lt,
7615
     *                                                   tr</p>
7616
     * @param bool                $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ ->
7617
     *                                                   ß</p>
7618
     * @param bool                $useTrimFirst          [optional] <p>true === trim the input string, first</p>
7619
     *
7620
     * @return string the titleized string
7621
     */
7622 5
    public static function str_titleize(
7623
        string $str,
7624
        array $ignore = null,
7625
        string $encoding = 'UTF-8',
7626
        bool $cleanUtf8 = false,
7627
        string $lang = null,
7628
        bool $tryToKeepStringLength = false,
7629
        bool $useTrimFirst = true
7630
    ): string {
7631 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
7632 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7633
        }
7634
7635 5
        if ($useTrimFirst === true) {
7636 5
            $str = \trim($str);
7637
        }
7638
7639 5
        if ($cleanUtf8 === true) {
7640
            $str = self::clean($str);
7641
        }
7642
7643 5
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
7644
7645 5
        return (string) \preg_replace_callback(
7646 5
            '/([\S]+)/u',
7647
            static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string {
7648 5
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
7649 2
                    return $match[0];
7650
                }
7651
7652 5
                if ($useMbFunction === true) {
7653 5
                    if ($encoding === 'UTF-8') {
7654 5
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
7655 5
                               . \mb_strtolower(\mb_substr($match[0], 1));
7656
                    }
7657
7658
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
7659
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
7660
                }
7661
7662
                return self::ucfirst(
7663
                    self::strtolower(
7664
                        $match[0],
7665
                        $encoding,
7666
                        false,
7667
                        $lang,
7668
                        $tryToKeepStringLength
7669
                    ),
7670
                    $encoding,
7671
                    false,
7672
                    $lang,
7673
                    $tryToKeepStringLength
7674
                );
7675 5
            },
7676 5
            $str
7677
        );
7678
    }
7679
7680
    /**
7681
     * Returns a trimmed string in proper title case.
7682
     *
7683
     * Also accepts an array, $ignore, allowing you to list words not to be
7684
     * capitalized.
7685
     *
7686
     * Adapted from John Gruber's script.
7687
     *
7688
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
7689
     *
7690
     * @param string $str
7691
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
7692
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7693
     *
7694
     * @return string the titleized string
7695
     */
7696 35
    public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string
7697
    {
7698 35
        $smallWords = \array_merge(
7699
            [
7700 35
                '(?<!q&)a',
7701
                'an',
7702
                'and',
7703
                'as',
7704
                'at(?!&t)',
7705
                'but',
7706
                'by',
7707
                'en',
7708
                'for',
7709
                'if',
7710
                'in',
7711
                'of',
7712
                'on',
7713
                'or',
7714
                'the',
7715
                'to',
7716
                'v[.]?',
7717
                'via',
7718
                'vs[.]?',
7719
            ],
7720 35
            $ignore
7721
        );
7722
7723 35
        $smallWordsRx = \implode('|', $smallWords);
7724 35
        $apostropheRx = '(?x: [\'’] [[:lower:]]* )?';
7725
7726 35
        $str = \trim($str);
7727
7728 35
        if (self::has_lowercase($str) === false) {
7729 2
            $str = self::strtolower($str, $encoding);
7730
        }
7731
7732
        // the main substitutions
7733 35
        $str = (string) \preg_replace_callback(
7734
            '~\b (_*) (?:                                                              # 1. Leading underscore and
7735
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |              # 2. file path or 
7736 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) #    URL, domain, or email
7737
                        |
7738 35
                        ( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' )            # 3. or small word (case-insensitive)
7739
                        |
7740 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 4. or word w/o internal caps
7741
                        |
7742 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' )     # 5. or some other word
7743
                      ) (_*) \b                                                           # 6. With trailing underscore
7744
                    ~ux',
7745
            /**
7746
             * @param string[] $matches
7747
             *
7748
             * @return string
7749
             */
7750
            static function (array $matches) use ($encoding): string {
7751
                // preserve leading underscore
7752 35
                $str = $matches[1];
7753 35
                if ($matches[2]) {
7754
                    // preserve URLs, domains, emails and file paths
7755 5
                    $str .= $matches[2];
7756 35
                } elseif ($matches[3]) {
7757
                    // lower-case small words
7758 25
                    $str .= self::strtolower($matches[3], $encoding);
7759 35
                } elseif ($matches[4]) {
7760
                    // capitalize word w/o internal caps
7761 34
                    $str .= static::str_upper_first($matches[4], $encoding);
7762
                } else {
7763
                    // preserve other kinds of word (iPhone)
7764 7
                    $str .= $matches[5];
7765
                }
7766
                // Preserve trailing underscore
7767 35
                $str .= $matches[6];
7768
7769 35
                return $str;
7770 35
            },
7771 35
            $str
7772
        );
7773
7774
        // Exceptions for small words: capitalize at start of title...
7775 35
        $str = (string) \preg_replace_callback(
7776
            '~(  \A [[:punct:]]*                # start of title...
7777
                      |  [:.;?!][ ]+               # or of subsentence...
7778
                      |  [ ][\'"“‘(\[][ ]* )       # or of inserted subphrase...
7779 35
                      ( ' . $smallWordsRx . ' ) \b # ...followed by small word
7780
                     ~uxi',
7781
            /**
7782
             * @param string[] $matches
7783
             *
7784
             * @return string
7785
             */
7786
            static function (array $matches) use ($encoding): string {
7787 11
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7788 35
            },
7789 35
            $str
7790
        );
7791
7792
        // ...and end of title
7793 35
        $str = (string) \preg_replace_callback(
7794 35
            '~\b ( ' . $smallWordsRx . ' ) # small word...
7795
                      (?= [[:punct:]]* \Z     # ...at the end of the title...
7796
                      |   [\'"’”)\]] [ ] )    # ...or of an inserted subphrase?
7797
                     ~uxi',
7798
            /**
7799
             * @param string[] $matches
7800
             *
7801
             * @return string
7802
             */
7803
            static function (array $matches) use ($encoding): string {
7804 3
                return static::str_upper_first($matches[1], $encoding);
7805 35
            },
7806 35
            $str
7807
        );
7808
7809
        // Exceptions for small words in hyphenated compound words.
7810
        // e.g. "in-flight" -> In-Flight
7811 35
        $str = (string) \preg_replace_callback(
7812
            '~\b
7813
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
7814 35
                        ( ' . $smallWordsRx . ' )
7815
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
7816
                       ~uxi',
7817
            /**
7818
             * @param string[] $matches
7819
             *
7820
             * @return string
7821
             */
7822
            static function (array $matches) use ($encoding): string {
7823
                return static::str_upper_first($matches[1], $encoding);
7824 35
            },
7825 35
            $str
7826
        );
7827
7828
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
7829 35
        $str = (string) \preg_replace_callback(
7830
            '~\b
7831
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
7832
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
7833 35
                      ( ' . $smallWordsRx . ' ) # ...followed by small word
7834
                      (?!	- )                   # Negative lookahead for another -
7835
                     ~uxi',
7836
            /**
7837
             * @param string[] $matches
7838
             *
7839
             * @return string
7840
             */
7841
            static function (array $matches) use ($encoding): string {
7842
                return $matches[1] . static::str_upper_first($matches[2], $encoding);
7843 35
            },
7844 35
            $str
7845
        );
7846
7847 35
        return $str;
7848
    }
7849
7850
    /**
7851
     * Get a binary representation of a specific string.
7852
     *
7853
     * @param string $str <p>The input string.</p>
7854
     *
7855
     * @return string
7856
     */
7857 2
    public static function str_to_binary(string $str): string
7858
    {
7859 2
        $value = \unpack('H*', $str);
7860
7861 2
        return \base_convert($value[1], 16, 2);
7862
    }
7863
7864
    /**
7865
     * @param string   $str
7866
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7867
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7868
     *
7869
     * @return string[]
7870
     */
7871 17
    public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array
7872
    {
7873 17
        if ($str === '') {
7874 1
            return $removeEmptyValues === true ? [] : [''];
7875
        }
7876
7877 16
        if (self::$SUPPORT['mbstring'] === true) {
7878
            /** @noinspection PhpComposerExtensionStubsInspection */
7879 16
            $return = \mb_split('[\r\n]{1,2}', $str);
7880
        } else {
7881
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
7882
        }
7883
7884 16
        if ($return === false) {
7885
            return $removeEmptyValues === true ? [] : [''];
7886
        }
7887
7888
        if (
7889 16
            $removeShortValues === null
7890
            &&
7891 16
            $removeEmptyValues === false
7892
        ) {
7893 16
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7894
        }
7895
7896
        return self::reduce_string_array(
7897
            $return,
7898
            $removeEmptyValues,
7899
            $removeShortValues
7900
        );
7901
    }
7902
7903
    /**
7904
     * Convert a string into an array of words.
7905
     *
7906
     * @param string   $str
7907
     * @param string   $charList          <p>Additional chars for the definition of "words".</p>
7908
     * @param bool     $removeEmptyValues <p>Remove empty values.</p>
7909
     * @param int|null $removeShortValues <p>The min. string length or null to disable</p>
7910
     *
7911
     * @return string[]
7912
     */
7913 13
    public static function str_to_words(
7914
        string $str,
7915
        string $charList = '',
7916
        bool $removeEmptyValues = false,
7917
        int $removeShortValues = null
7918
    ): array {
7919 13
        if ($str === '') {
7920 4
            return $removeEmptyValues === true ? [] : [''];
7921
        }
7922
7923 13
        $charList = self::rxClass($charList, '\pL');
7924
7925 13
        $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
7926 13
        if ($return === false) {
7927
            return $removeEmptyValues === true ? [] : [''];
7928
        }
7929
7930
        if (
7931 13
            $removeShortValues === null
7932
            &&
7933 13
            $removeEmptyValues === false
7934
        ) {
7935 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
7936
        }
7937
7938 2
        $tmpReturn = self::reduce_string_array(
7939 2
            $return,
7940 2
            $removeEmptyValues,
7941 2
            $removeShortValues
7942
        );
7943
7944 2
        foreach ($tmpReturn as &$item) {
7945 2
            $item = (string) $item;
7946
        }
7947
7948 2
        return $tmpReturn;
7949
    }
7950
7951
    /**
7952
     * alias for "UTF8::to_ascii()"
7953
     *
7954
     * @see UTF8::to_ascii()
7955
     *
7956
     * @param string $str
7957
     * @param string $unknown
7958
     * @param bool   $strict
7959
     *
7960
     * @return string
7961
     */
7962 8
    public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string
7963
    {
7964 8
        return self::to_ascii($str, $unknown, $strict);
7965
    }
7966
7967
    /**
7968
     * Truncates the string to a given length. If $substring is provided, and
7969
     * truncating occurs, the string is further truncated so that the substring
7970
     * may be appended without exceeding the desired length.
7971
     *
7972
     * @param string $str
7973
     * @param int    $length    <p>Desired length of the truncated string.</p>
7974
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
7975
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7976
     *
7977
     * @return string string after truncating
7978
     */
7979 22
    public static function str_truncate(
7980
        string $str,
7981
        int $length,
7982
        string $substring = '',
7983
        string $encoding = 'UTF-8'
7984
    ): string {
7985 22
        if ($str === '') {
7986
            return '';
7987
        }
7988
7989 22
        if ($encoding === 'UTF-8') {
7990 10
            if ($length >= (int) \mb_strlen($str)) {
7991 2
                return $str;
7992
            }
7993
7994 8
            if ($substring !== '') {
7995 4
                $length -= (int) \mb_strlen($substring);
7996
7997
                /** @noinspection UnnecessaryCastingInspection */
7998 4
                return (string) \mb_substr($str, 0, $length) . $substring;
7999
            }
8000
8001
            /** @noinspection UnnecessaryCastingInspection */
8002 4
            return (string) \mb_substr($str, 0, $length);
8003
        }
8004
8005 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8006
8007 12
        if ($length >= (int) self::strlen($str, $encoding)) {
8008 2
            return $str;
8009
        }
8010
8011 10
        if ($substring !== '') {
8012 6
            $length -= (int) self::strlen($substring, $encoding);
8013
        }
8014
8015
        return (
8016 10
            (string) self::substr(
8017 10
                $str,
8018 10
                0,
8019 10
                $length,
8020 10
                $encoding
8021
            )
8022 10
       ) . $substring;
8023
    }
8024
8025
    /**
8026
     * Truncates the string to a given length, while ensuring that it does not
8027
     * split words. If $substring is provided, and truncating occurs, the
8028
     * string is further truncated so that the substring may be appended without
8029
     * exceeding the desired length.
8030
     *
8031
     * @param string $str
8032
     * @param int    $length                          <p>Desired length of the truncated string.</p>
8033
     * @param string $substring                       [optional] <p>The substring to append if it can fit. Default:
8034
     *                                                ''</p>
8035
     * @param string $encoding                        [optional] <p>Default: 'UTF-8'</p>
8036
     * @param bool   $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p>
8037
     *
8038
     * @return string string after truncating
8039
     */
8040 47
    public static function str_truncate_safe(
8041
        string $str,
8042
        int $length,
8043
        string $substring = '',
8044
        string $encoding = 'UTF-8',
8045
        bool $ignoreDoNotSplitWordsForOneWord = false
8046
    ): string {
8047 47
        if ($str === '' || $length <= 0) {
8048 1
            return $substring;
8049
        }
8050
8051 47
        if ($encoding === 'UTF-8') {
8052 21
            if ($length >= (int) \mb_strlen($str)) {
8053 5
                return $str;
8054
            }
8055
8056
            // need to further trim the string so we can append the substring
8057 17
            $length -= (int) \mb_strlen($substring);
8058 17
            if ($length <= 0) {
8059 1
                return $substring;
8060
            }
8061
8062 17
            $truncated = \mb_substr($str, 0, $length);
8063
8064 17
            if ($truncated === false) {
8065
                return '';
8066
            }
8067
8068
            // if the last word was truncated
8069 17
            $strPosSpace = \mb_strpos($str, ' ', $length - 1);
8070 17
            if ($strPosSpace !== $length) {
8071
                // find pos of the last occurrence of a space, get up to that
8072 13
                $lastPos = \mb_strrpos($truncated, ' ', 0);
8073
8074
                if (
8075 13
                    $lastPos !== false
8076
                    ||
8077 13
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8078
                ) {
8079 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $lastPos);
8080
                }
8081
            }
8082
        } else {
8083 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8084
8085 26
            if ($length >= (int) self::strlen($str, $encoding)) {
8086 4
                return $str;
8087
            }
8088
8089
            // need to further trim the string so we can append the substring
8090 22
            $length -= (int) self::strlen($substring, $encoding);
8091 22
            if ($length <= 0) {
8092
                return $substring;
8093
            }
8094
8095 22
            $truncated = self::substr($str, 0, $length, $encoding);
8096
8097 22
            if ($truncated === false) {
8098
                return '';
8099
            }
8100
8101
            // if the last word was truncated
8102 22
            $strPosSpace = self::strpos($str, ' ', $length - 1, $encoding);
8103 22
            if ($strPosSpace !== $length) {
8104
                // find pos of the last occurrence of a space, get up to that
8105 12
                $lastPos = self::strrpos($truncated, ' ', 0, $encoding);
8106
8107
                if (
8108 12
                    $lastPos !== false
8109
                    ||
8110 12
                    ($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false)
8111
                ) {
8112 9
                    $truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding);
8113
                }
8114
            }
8115
        }
8116
8117 39
        return $truncated . $substring;
8118
    }
8119
8120
    /**
8121
     * Returns a lowercase and trimmed string separated by underscores.
8122
     * Underscores are inserted before uppercase characters (with the exception
8123
     * of the first character of the string), and in place of spaces as well as
8124
     * dashes.
8125
     *
8126
     * @param string $str
8127
     *
8128
     * @return string the underscored string
8129
     */
8130 16
    public static function str_underscored(string $str): string
8131
    {
8132 16
        return self::str_delimit($str, '_');
8133
    }
8134
8135
    /**
8136
     * Returns an UpperCamelCase version of the supplied string. It trims
8137
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
8138
     * and underscores, and removes spaces, dashes, underscores.
8139
     *
8140
     * @param string      $str                   <p>The input string.</p>
8141
     * @param string      $encoding              [optional] <p>Default: 'UTF-8'</p>
8142
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
8143
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
8144
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
8145
     *
8146
     * @return string string in UpperCamelCase
8147
     */
8148 13
    public static function str_upper_camelize(
8149
        string $str,
8150
        string $encoding = 'UTF-8',
8151
        bool $cleanUtf8 = false,
8152
        string $lang = null,
8153
        bool $tryToKeepStringLength = false
8154
    ): string {
8155 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8156
    }
8157
8158
    /**
8159
     * alias for "UTF8::ucfirst()"
8160
     *
8161
     * @see UTF8::ucfirst()
8162
     *
8163
     * @param string      $str
8164
     * @param string      $encoding
8165
     * @param bool        $cleanUtf8
8166
     * @param string|null $lang
8167
     * @param bool        $tryToKeepStringLength
8168
     *
8169
     * @return string
8170
     */
8171 39
    public static function str_upper_first(
8172
        string $str,
8173
        string $encoding = 'UTF-8',
8174
        bool $cleanUtf8 = false,
8175
        string $lang = null,
8176
        bool $tryToKeepStringLength = false
8177
    ): string {
8178 39
        return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength);
8179
    }
8180
8181
    /**
8182
     * Counts number of words in the UTF-8 string.
8183
     *
8184
     * @param string $str      <p>The input string.</p>
8185
     * @param int    $format   [optional] <p>
8186
     *                         <strong>0</strong> => return a number of words (default)<br>
8187
     *                         <strong>1</strong> => return an array of words<br>
8188
     *                         <strong>2</strong> => return an array of words with word-offset as key
8189
     *                         </p>
8190
     * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
8191
     *
8192
     * @return int|string[] The number of words in the string
8193
     */
8194 2
    public static function str_word_count(string $str, int $format = 0, string $charlist = '')
8195
    {
8196 2
        $strParts = self::str_to_words($str, $charlist);
8197
8198 2
        $len = \count($strParts);
8199
8200 2
        if ($format === 1) {
8201 2
            $numberOfWords = [];
8202 2
            for ($i = 1; $i < $len; $i += 2) {
8203 2
                $numberOfWords[] = $strParts[$i];
8204
            }
8205 2
        } elseif ($format === 2) {
8206 2
            $numberOfWords = [];
8207 2
            $offset = (int) self::strlen($strParts[0]);
8208 2
            for ($i = 1; $i < $len; $i += 2) {
8209 2
                $numberOfWords[$offset] = $strParts[$i];
8210 2
                $offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]);
8211
            }
8212
        } else {
8213 2
            $numberOfWords = (int) (($len - 1) / 2);
8214
        }
8215
8216 2
        return $numberOfWords;
8217
    }
8218
8219
    /**
8220
     * Case-insensitive string comparison.
8221
     *
8222
     * INFO: Case-insensitive version of UTF8::strcmp()
8223
     *
8224
     * @param string $str1     <p>The first string.</p>
8225
     * @param string $str2     <p>The second string.</p>
8226
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8227
     *
8228
     * @return int
8229
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8230
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
8231
     *             <strong>0</strong> if they are equal
8232
     */
8233 23
    public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8234
    {
8235 23
        return self::strcmp(
8236 23
            self::strtocasefold($str1, true, false, $encoding, null, false),
8237 23
            self::strtocasefold($str2, true, false, $encoding, null, false)
8238
        );
8239
    }
8240
8241
    /**
8242
     * alias for "UTF8::strstr()"
8243
     *
8244
     * @see UTF8::strstr()
8245
     *
8246
     * @param string $haystack
8247
     * @param string $needle
8248
     * @param bool   $before_needle
8249
     * @param string $encoding
8250
     * @param bool   $cleanUtf8
8251
     *
8252
     * @return false|string
8253
     */
8254 2
    public static function strchr(
8255
        string $haystack,
8256
        string $needle,
8257
        bool $before_needle = false,
8258
        string $encoding = 'UTF-8',
8259
        bool $cleanUtf8 = false
8260
    ) {
8261 2
        return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8262
    }
8263
8264
    /**
8265
     * Case-sensitive string comparison.
8266
     *
8267
     * @param string $str1 <p>The first string.</p>
8268
     * @param string $str2 <p>The second string.</p>
8269
     *
8270
     * @return int
8271
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8272
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8273
     *             <strong>0</strong> if they are equal
8274
     */
8275 29
    public static function strcmp(string $str1, string $str2): int
8276
    {
8277 29
        return $str1 . '' === $str2 . '' ? 0 : \strcmp(
8278 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
8279 29
            \Normalizer::normalize($str2, \Normalizer::NFD)
8280
        );
8281
    }
8282
8283
    /**
8284
     * Find length of initial segment not matching mask.
8285
     *
8286
     * @param string $str
8287
     * @param string $charList
8288
     * @param int    $offset
8289
     * @param int    $length
8290
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8291
     *
8292
     * @return int
8293
     */
8294 12
    public static function strcspn(
8295
        string $str,
8296
        string $charList,
8297
        int $offset = null,
8298
        int $length = null,
8299
        string $encoding = 'UTF-8'
8300
    ): int {
8301 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8302
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8303
        }
8304
8305 12
        if ($charList === '') {
8306 2
            return (int) self::strlen($str, $encoding);
8307
        }
8308
8309 11
        if ($offset !== null || $length !== null) {
8310 3
            if ($encoding === 'UTF-8') {
8311 3
                if ($length === null) {
8312
                    /** @noinspection UnnecessaryCastingInspection */
8313 2
                    $strTmp = \mb_substr($str, (int) $offset);
8314
                } else {
8315
                    /** @noinspection UnnecessaryCastingInspection */
8316 3
                    $strTmp = \mb_substr($str, (int) $offset, $length);
8317
                }
8318
            } else {
8319
                /** @noinspection UnnecessaryCastingInspection */
8320
                $strTmp = self::substr($str, (int) $offset, $length, $encoding);
8321
            }
8322 3
            if ($strTmp === false) {
8323
                return 0;
8324
            }
8325 3
            $str = $strTmp;
8326
        }
8327
8328 11
        if ($str === '') {
8329 2
            return 0;
8330
        }
8331
8332 10
        $matches = [];
8333 10
        if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) {
8334 9
            $return = self::strlen($matches[1], $encoding);
8335 9
            if ($return === false) {
8336
                return 0;
8337
            }
8338
8339 9
            return $return;
8340
        }
8341
8342 2
        return (int) self::strlen($str, $encoding);
8343
    }
8344
8345
    /**
8346
     * alias for "UTF8::stristr()"
8347
     *
8348
     * @see UTF8::stristr()
8349
     *
8350
     * @param string $haystack
8351
     * @param string $needle
8352
     * @param bool   $before_needle
8353
     * @param string $encoding
8354
     * @param bool   $cleanUtf8
8355
     *
8356
     * @return false|string
8357
     */
8358 1
    public static function strichr(
8359
        string $haystack,
8360
        string $needle,
8361
        bool $before_needle = false,
8362
        string $encoding = 'UTF-8',
8363
        bool $cleanUtf8 = false
8364
    ) {
8365 1
        return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
8366
    }
8367
8368
    /**
8369
     * Create a UTF-8 string from code points.
8370
     *
8371
     * INFO: opposite to UTF8::codepoints()
8372
     *
8373
     * @param array $array <p>Integer or Hexadecimal codepoints.</p>
8374
     *
8375
     * @return string UTF-8 encoded string
8376
     */
8377 4
    public static function string(array $array): string
8378
    {
8379 4
        return \implode(
8380 4
            '',
8381 4
            \array_map(
8382
                [
8383 4
                    self::class,
8384
                    'chr',
8385
                ],
8386 4
                $array
8387
            )
8388
        );
8389
    }
8390
8391
    /**
8392
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
8393
     *
8394
     * @param string $str <p>The input string.</p>
8395
     *
8396
     * @return bool
8397
     *              <strong>true</strong> if the string has BOM at the start,<br>
8398
     *              <strong>false</strong> otherwise
8399
     */
8400 6
    public static function string_has_bom(string $str): bool
8401
    {
8402
        /** @noinspection PhpUnusedLocalVariableInspection */
8403 6
        foreach (self::$BOM as $bomString => &$bomByteLength) {
8404 6
            if (\strpos($str, $bomString) === 0) {
8405 6
                return true;
8406
            }
8407
        }
8408
8409 6
        return false;
8410
    }
8411
8412
    /**
8413
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
8414
     *
8415
     * @see http://php.net/manual/en/function.strip-tags.php
8416
     *
8417
     * @param string $str            <p>
8418
     *                               The input string.
8419
     *                               </p>
8420
     * @param string $allowable_tags [optional] <p>
8421
     *                               You can use the optional second parameter to specify tags which should
8422
     *                               not be stripped.
8423
     *                               </p>
8424
     *                               <p>
8425
     *                               HTML comments and PHP tags are also stripped. This is hardcoded and
8426
     *                               can not be changed with allowable_tags.
8427
     *                               </p>
8428
     * @param bool   $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
8429
     *
8430
     * @return string the stripped string
8431
     */
8432 4
    public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string
8433
    {
8434 4
        if ($str === '') {
8435 1
            return '';
8436
        }
8437
8438 4
        if ($cleanUtf8 === true) {
8439 2
            $str = self::clean($str);
8440
        }
8441
8442 4
        if ($allowable_tags === null) {
8443 4
            return \strip_tags($str);
8444
        }
8445
8446 2
        return \strip_tags($str, $allowable_tags);
8447
    }
8448
8449
    /**
8450
     * Strip all whitespace characters. This includes tabs and newline
8451
     * characters, as well as multibyte whitespace such as the thin space
8452
     * and ideographic space.
8453
     *
8454
     * @param string $str
8455
     *
8456
     * @return string
8457
     */
8458 36
    public static function strip_whitespace(string $str): string
8459
    {
8460 36
        if ($str === '') {
8461 3
            return '';
8462
        }
8463
8464 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
8465
    }
8466
8467
    /**
8468
     * Finds position of first occurrence of a string within another, case insensitive.
8469
     *
8470
     * @see http://php.net/manual/en/function.mb-stripos.php
8471
     *
8472
     * @param string $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8473
     * @param string $needle    <p>The string to find in haystack.</p>
8474
     * @param int    $offset    [optional] <p>The position in haystack to start searching.</p>
8475
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8476
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8477
     *
8478
     * @return false|int
8479
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
8480
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
8481
     */
8482 24
    public static function stripos(
8483
        string $haystack,
8484
        string $needle,
8485
        int $offset = 0,
8486
        $encoding = 'UTF-8',
8487
        bool $cleanUtf8 = false
8488
    ) {
8489 24
        if ($haystack === '' || $needle === '') {
8490 5
            return false;
8491
        }
8492
8493 23
        if ($cleanUtf8 === true) {
8494
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8495
            // if invalid characters are found in $haystack before $needle
8496 1
            $haystack = self::clean($haystack);
8497 1
            $needle = self::clean($needle);
8498
        }
8499
8500 23
        if (self::$SUPPORT['mbstring'] === true) {
8501 23
            if ($encoding === 'UTF-8') {
8502 23
                return \mb_stripos($haystack, $needle, $offset);
8503
            }
8504
8505 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8506
8507 3
            return \mb_stripos($haystack, $needle, $offset, $encoding);
8508
        }
8509
8510 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8511
8512
        if (
8513 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
8514
            &&
8515 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
8516
            &&
8517 2
            self::$SUPPORT['intl'] === true
8518
        ) {
8519
            $returnTmp = \grapheme_stripos($haystack, $needle, $offset);
8520
            if ($returnTmp !== false) {
8521
                return $returnTmp;
8522
            }
8523
        }
8524
8525
        //
8526
        // fallback for ascii only
8527
        //
8528
8529 2
        if (self::is_ascii($haystack . $needle)) {
8530
            return \stripos($haystack, $needle, $offset);
8531
        }
8532
8533
        //
8534
        // fallback via vanilla php
8535
        //
8536
8537 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
8538 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
8539
8540 2
        return self::strpos($haystack, $needle, $offset, $encoding);
8541
    }
8542
8543
    /**
8544
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
8545
     *
8546
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
8547
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
8548
     * @param bool   $before_needle [optional] <p>
8549
     *                              If <b>TRUE</b>, it returns the part of the
8550
     *                              haystack before the first occurrence of the needle (excluding the needle).
8551
     *                              </p>
8552
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
8553
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
8554
     *
8555
     * @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found
8556
     */
8557 12
    public static function stristr(
8558
        string $haystack,
8559
        string $needle,
8560
        bool $before_needle = false,
8561
        string $encoding = 'UTF-8',
8562
        bool $cleanUtf8 = false
8563
    ) {
8564 12
        if ($haystack === '' || $needle === '') {
8565 3
            return false;
8566
        }
8567
8568 9
        if ($cleanUtf8 === true) {
8569
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8570
            // if invalid characters are found in $haystack before $needle
8571 1
            $needle = self::clean($needle);
8572 1
            $haystack = self::clean($haystack);
8573
        }
8574
8575 9
        if (!$needle) {
8576
            return $haystack;
8577
        }
8578
8579 9
        if (self::$SUPPORT['mbstring'] === true) {
8580 9
            if ($encoding === 'UTF-8') {
8581 9
                return \mb_stristr($haystack, $needle, $before_needle);
8582
            }
8583
8584 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8585
8586 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
8587
        }
8588
8589
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8590
8591
        if (
8592
            $encoding !== 'UTF-8'
8593
            &&
8594
            self::$SUPPORT['mbstring'] === false
8595
        ) {
8596
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8597
        }
8598
8599
        if (
8600
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
8601
            &&
8602
            self::$SUPPORT['intl'] === true
8603
        ) {
8604
            $returnTmp = \grapheme_stristr($haystack, $needle, $before_needle);
8605
            if ($returnTmp !== false) {
8606
                return $returnTmp;
8607
            }
8608
        }
8609
8610
        if (self::is_ascii($needle . $haystack)) {
8611
            return \stristr($haystack, $needle, $before_needle);
8612
        }
8613
8614
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
8615
8616
        if (!isset($match[1])) {
8617
            return false;
8618
        }
8619
8620
        if ($before_needle) {
8621
            return $match[1];
8622
        }
8623
8624
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
8625
    }
8626
8627
    /**
8628
     * Get the string length, not the byte-length!
8629
     *
8630
     * @see     http://php.net/manual/en/function.mb-strlen.php
8631
     *
8632
     * @param string $str       <p>The string being checked for length.</p>
8633
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8634
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8635
     *
8636
     * @return false|int
8637
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
8638
     *                   $encoding.
8639
     *                   (One multi-byte character counted as +1).
8640
     *                   <br>
8641
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
8642
     *                   chars.
8643
     */
8644 173
    public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false)
8645
    {
8646 173
        if ($str === '') {
8647 21
            return 0;
8648
        }
8649
8650 171
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8651 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8652
        }
8653
8654 171
        if ($cleanUtf8 === true) {
8655
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
8656
            // if invalid characters are found in $str
8657 4
            $str = self::clean($str);
8658
        }
8659
8660
        //
8661
        // fallback via mbstring
8662
        //
8663
8664 171
        if (self::$SUPPORT['mbstring'] === true) {
8665 165
            if ($encoding === 'UTF-8') {
8666 165
                return \mb_strlen($str);
8667
            }
8668
8669 4
            return \mb_strlen($str, $encoding);
8670
        }
8671
8672
        //
8673
        // fallback for binary || ascii only
8674
        //
8675
8676
        if (
8677 8
            $encoding === 'CP850'
8678
            ||
8679 8
            $encoding === 'ASCII'
8680
        ) {
8681
            return \strlen($str);
8682
        }
8683
8684
        if (
8685 8
            $encoding !== 'UTF-8'
8686
            &&
8687 8
            self::$SUPPORT['mbstring'] === false
8688
            &&
8689 8
            self::$SUPPORT['iconv'] === false
8690
        ) {
8691 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8692
        }
8693
8694
        //
8695
        // fallback via iconv
8696
        //
8697
8698 8
        if (self::$SUPPORT['iconv'] === true) {
8699
            $returnTmp = \iconv_strlen($str, $encoding);
8700
            if ($returnTmp !== false) {
8701
                return $returnTmp;
8702
            }
8703
        }
8704
8705
        //
8706
        // fallback via intl
8707
        //
8708
8709
        if (
8710 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
8711
            &&
8712 8
            self::$SUPPORT['intl'] === true
8713
        ) {
8714
            $returnTmp = \grapheme_strlen($str);
8715
            if ($returnTmp !== null) {
8716
                return $returnTmp;
8717
            }
8718
        }
8719
8720
        //
8721
        // fallback for ascii only
8722
        //
8723
8724 8
        if (self::is_ascii($str)) {
8725 4
            return \strlen($str);
8726
        }
8727
8728
        //
8729
        // fallback via vanilla php
8730
        //
8731
8732 8
        \preg_match_all('/./us', $str, $parts);
8733
8734 8
        $returnTmp = \count($parts[0]);
8735 8
        if ($returnTmp === 0) {
8736
            return false;
8737
        }
8738
8739 8
        return $returnTmp;
8740
    }
8741
8742
    /**
8743
     * Get string length in byte.
8744
     *
8745
     * @param string $str
8746
     *
8747
     * @return int
8748
     */
8749
    public static function strlen_in_byte(string $str): int
8750
    {
8751
        if ($str === '') {
8752
            return 0;
8753
        }
8754
8755
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
8756
            // "mb_" is available if overload is used, so use it ...
8757
            return \mb_strlen($str, 'CP850'); // 8-BIT
8758
        }
8759
8760
        return \strlen($str);
8761
    }
8762
8763
    /**
8764
     * Case insensitive string comparisons using a "natural order" algorithm.
8765
     *
8766
     * INFO: natural order version of UTF8::strcasecmp()
8767
     *
8768
     * @param string $str1     <p>The first string.</p>
8769
     * @param string $str2     <p>The second string.</p>
8770
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8771
     *
8772
     * @return int
8773
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
8774
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
8775
     *             <strong>0</strong> if they are equal
8776
     */
8777 2
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
8778
    {
8779 2
        return self::strnatcmp(
8780 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8781 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
8782
        );
8783
    }
8784
8785
    /**
8786
     * String comparisons using a "natural order" algorithm
8787
     *
8788
     * INFO: natural order version of UTF8::strcmp()
8789
     *
8790
     * @see  http://php.net/manual/en/function.strnatcmp.php
8791
     *
8792
     * @param string $str1 <p>The first string.</p>
8793
     * @param string $str2 <p>The second string.</p>
8794
     *
8795
     * @return int
8796
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
8797
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
8798
     *             <strong>0</strong> if they are equal
8799
     */
8800 4
    public static function strnatcmp(string $str1, string $str2): int
8801
    {
8802 4
        return $str1 . '' === $str2 . '' ? 0 : \strnatcmp((string) self::strtonatfold($str1), (string) self::strtonatfold($str2));
8803
    }
8804
8805
    /**
8806
     * Case-insensitive string comparison of the first n characters.
8807
     *
8808
     * @see  http://php.net/manual/en/function.strncasecmp.php
8809
     *
8810
     * @param string $str1     <p>The first string.</p>
8811
     * @param string $str2     <p>The second string.</p>
8812
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
8813
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8814
     *
8815
     * @return int
8816
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8817
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8818
     *             <strong>0</strong> if they are equal
8819
     */
8820 2
    public static function strncasecmp(
8821
        string $str1,
8822
        string $str2,
8823
        int $len,
8824
        string $encoding = 'UTF-8'
8825
    ): int {
8826 2
        return self::strncmp(
8827 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
8828 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
8829 2
            $len
8830
        );
8831
    }
8832
8833
    /**
8834
     * String comparison of the first n characters.
8835
     *
8836
     * @see  http://php.net/manual/en/function.strncmp.php
8837
     *
8838
     * @param string $str1     <p>The first string.</p>
8839
     * @param string $str2     <p>The second string.</p>
8840
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
8841
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8842
     *
8843
     * @return int
8844
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
8845
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
8846
     *             <strong>0</strong> if they are equal
8847
     */
8848 4
    public static function strncmp(
8849
        string $str1,
8850
        string $str2,
8851
        int $len,
8852
        string $encoding = 'UTF-8'
8853
    ): int {
8854 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8855
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8856
        }
8857
8858 4
        if ($encoding === 'UTF-8') {
8859 4
            $str1 = (string) \mb_substr($str1, 0, $len);
8860 4
            $str2 = (string) \mb_substr($str2, 0, $len);
8861
        } else {
8862
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
8863
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
8864
        }
8865
8866 4
        return self::strcmp($str1, $str2);
8867
    }
8868
8869
    /**
8870
     * Search a string for any of a set of characters.
8871
     *
8872
     * @see  http://php.net/manual/en/function.strpbrk.php
8873
     *
8874
     * @param string $haystack  <p>The string where char_list is looked for.</p>
8875
     * @param string $char_list <p>This parameter is case sensitive.</p>
8876
     *
8877
     * @return false|string string starting from the character found, or false if it is not found
8878
     */
8879 2
    public static function strpbrk(string $haystack, string $char_list)
8880
    {
8881 2
        if ($haystack === '' || $char_list === '') {
8882 2
            return false;
8883
        }
8884
8885 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
8886 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
8887
        }
8888
8889 2
        return false;
8890
    }
8891
8892
    /**
8893
     * Find position of first occurrence of string in a string.
8894
     *
8895
     * @see http://php.net/manual/en/function.mb-strpos.php
8896
     *
8897
     * @param string     $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
8898
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
8899
     * @param int        $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
8900
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
8901
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
8902
     *
8903
     * @return false|int
8904
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
8905
     *                   string.<br> If needle is not found it returns false.
8906
     */
8907 53
    public static function strpos(
8908
        string $haystack,
8909
        $needle,
8910
        int $offset = 0,
8911
        $encoding = 'UTF-8',
8912
        bool $cleanUtf8 = false
8913
    ) {
8914 53
        if ($haystack === '') {
8915 4
            return false;
8916
        }
8917
8918
        // iconv and mbstring do not support integer $needle
8919 52
        if ((int) $needle === $needle) {
8920
            $needle = (string) self::chr($needle);
8921
        }
8922 52
        $needle = (string) $needle;
8923
8924 52
        if ($needle === '') {
8925 2
            return false;
8926
        }
8927
8928 52
        if ($cleanUtf8 === true) {
8929
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
8930
            // if invalid characters are found in $haystack before $needle
8931 3
            $needle = self::clean($needle);
8932 3
            $haystack = self::clean($haystack);
8933
        }
8934
8935 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8936 11
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8937
        }
8938
8939
        //
8940
        // fallback via mbstring
8941
        //
8942
8943 52
        if (self::$SUPPORT['mbstring'] === true) {
8944 50
            if ($encoding === 'UTF-8') {
8945 50
                return \mb_strpos($haystack, $needle, $offset);
8946
            }
8947
8948 2
            return \mb_strpos($haystack, $needle, $offset, $encoding);
8949
        }
8950
8951
        //
8952
        // fallback for binary || ascii only
8953
        //
8954
        if (
8955 4
            $encoding === 'CP850'
8956
            ||
8957 4
            $encoding === 'ASCII'
8958
        ) {
8959 2
            return \strpos($haystack, $needle, $offset);
8960
        }
8961
8962
        if (
8963 4
            $encoding !== 'UTF-8'
8964
            &&
8965 4
            self::$SUPPORT['iconv'] === false
8966
            &&
8967 4
            self::$SUPPORT['mbstring'] === false
8968
        ) {
8969 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
8970
        }
8971
8972
        //
8973
        // fallback via intl
8974
        //
8975
8976
        if (
8977 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
8978
            &&
8979 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
8980
            &&
8981 4
            self::$SUPPORT['intl'] === true
8982
        ) {
8983
            $returnTmp = \grapheme_strpos($haystack, $needle, $offset);
8984
            if ($returnTmp !== false) {
8985
                return $returnTmp;
8986
            }
8987
        }
8988
8989
        //
8990
        // fallback via iconv
8991
        //
8992
8993
        if (
8994 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
8995
            &&
8996 4
            self::$SUPPORT['iconv'] === true
8997
        ) {
8998
            // ignore invalid negative offset to keep compatibility
8999
            // with php < 5.5.35, < 5.6.21, < 7.0.6
9000
            $returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
9001
            if ($returnTmp !== false) {
9002
                return $returnTmp;
9003
            }
9004
        }
9005
9006
        //
9007
        // fallback for ascii only
9008
        //
9009
9010 4
        if (self::is_ascii($haystack . $needle)) {
9011 2
            return \strpos($haystack, $needle, $offset);
9012
        }
9013
9014
        //
9015
        // fallback via vanilla php
9016
        //
9017
9018 4
        $haystackTmp = self::substr($haystack, $offset, null, $encoding);
9019 4
        if ($haystackTmp === false) {
9020
            $haystackTmp = '';
9021
        }
9022 4
        $haystack = (string) $haystackTmp;
9023
9024 4
        if ($offset < 0) {
9025
            $offset = 0;
9026
        }
9027
9028 4
        $pos = \strpos($haystack, $needle);
9029 4
        if ($pos === false) {
9030 2
            return false;
9031
        }
9032
9033 4
        if ($pos) {
9034 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
9035
        }
9036
9037 2
        return $offset + 0;
9038
    }
9039
9040
    /**
9041
     * Find position of first occurrence of string in a string.
9042
     *
9043
     * @param string $haystack <p>
9044
     *                         The string being checked.
9045
     *                         </p>
9046
     * @param string $needle   <p>
9047
     *                         The position counted from the beginning of haystack.
9048
     *                         </p>
9049
     * @param int    $offset   [optional] <p>
9050
     *                         The search offset. If it is not specified, 0 is used.
9051
     *                         </p>
9052
     *
9053
     * @return false|int The numeric position of the first occurrence of needle in the
9054
     *                   haystack string. If needle is not found, it returns false.
9055
     */
9056
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
9057
    {
9058
        if ($haystack === '' || $needle === '') {
9059
            return false;
9060
        }
9061
9062
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9063
            // "mb_" is available if overload is used, so use it ...
9064
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9065
        }
9066
9067
        return \strpos($haystack, $needle, $offset);
9068
    }
9069
9070
    /**
9071
     * Finds the last occurrence of a character in a string within another.
9072
     *
9073
     * @see http://php.net/manual/en/function.mb-strrchr.php
9074
     *
9075
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9076
     * @param string $needle        <p>The string to find in haystack</p>
9077
     * @param bool   $before_needle [optional] <p>
9078
     *                              Determines which portion of haystack
9079
     *                              this function returns.
9080
     *                              If set to true, it returns all of haystack
9081
     *                              from the beginning to the last occurrence of needle.
9082
     *                              If set to false, it returns all of haystack
9083
     *                              from the last occurrence of needle to the end,
9084
     *                              </p>
9085
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9086
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9087
     *
9088
     * @return false|string the portion of haystack or false if needle is not found
9089
     */
9090 2
    public static function strrchr(
9091
        string $haystack,
9092
        string $needle,
9093
        bool $before_needle = false,
9094
        string $encoding = 'UTF-8',
9095
        bool $cleanUtf8 = false
9096
    ) {
9097 2
        if ($haystack === '' || $needle === '') {
9098 2
            return false;
9099
        }
9100
9101 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9102 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9103
        }
9104
9105 2
        if ($cleanUtf8 === true) {
9106
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9107
            // if invalid characters are found in $haystack before $needle
9108 2
            $needle = self::clean($needle);
9109 2
            $haystack = self::clean($haystack);
9110
        }
9111
9112
        //
9113
        // fallback via mbstring
9114
        //
9115
9116 2
        if (self::$SUPPORT['mbstring'] === true) {
9117 2
            if ($encoding === 'UTF-8') {
9118 2
                return \mb_strrchr($haystack, $needle, $before_needle);
9119
            }
9120
9121 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
9122
        }
9123
9124
        //
9125
        // fallback for binary || ascii only
9126
        //
9127
9128
        if (
9129
            $before_needle === false
9130
            &&
9131
            (
9132
                $encoding === 'CP850'
9133
                ||
9134
                $encoding === 'ASCII'
9135
            )
9136
        ) {
9137
            return \strrchr($haystack, $needle);
9138
        }
9139
9140
        if (
9141
            $encoding !== 'UTF-8'
9142
            &&
9143
            self::$SUPPORT['mbstring'] === false
9144
        ) {
9145
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9146
        }
9147
9148
        //
9149
        // fallback via iconv
9150
        //
9151
9152
        if (self::$SUPPORT['iconv'] === true) {
9153
            $needleTmp = self::substr($needle, 0, 1, $encoding);
9154
            if ($needleTmp === false) {
9155
                return false;
9156
            }
9157
            $needle = (string) $needleTmp;
9158
9159
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
9160
            if ($pos === false) {
9161
                return false;
9162
            }
9163
9164
            if ($before_needle) {
9165
                return self::substr($haystack, 0, $pos, $encoding);
9166
            }
9167
9168
            return self::substr($haystack, $pos, null, $encoding);
9169
        }
9170
9171
        //
9172
        // fallback via vanilla php
9173
        //
9174
9175
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9176
        if ($needleTmp === false) {
9177
            return false;
9178
        }
9179
        $needle = (string) $needleTmp;
9180
9181
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
9182
        if ($pos === false) {
9183
            return false;
9184
        }
9185
9186
        if ($before_needle) {
9187
            return self::substr($haystack, 0, $pos, $encoding);
9188
        }
9189
9190
        return self::substr($haystack, $pos, null, $encoding);
9191
    }
9192
9193
    /**
9194
     * Reverses characters order in the string.
9195
     *
9196
     * @param string $str      <p>The input string.</p>
9197
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9198
     *
9199
     * @return string the string with characters in the reverse sequence
9200
     */
9201 10
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
9202
    {
9203 10
        if ($str === '') {
9204 4
            return '';
9205
        }
9206
9207
        // init
9208 8
        $reversed = '';
9209
9210 8
        $str = self::emoji_encode($str, true);
9211
9212 8
        if ($encoding === 'UTF-8') {
9213 8
            if (self::$SUPPORT['intl'] === true) {
9214
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
9215 8
                $i = (int) \grapheme_strlen($str);
9216 8
                while ($i--) {
9217 8
                    $reversedTmp = \grapheme_substr($str, $i, 1);
9218 8
                    if ($reversedTmp !== false) {
9219 8
                        $reversed .= $reversedTmp;
9220
                    }
9221
                }
9222
            } else {
9223
                $i = (int) \mb_strlen($str);
9224 8
                while ($i--) {
9225
                    $reversedTmp = \mb_substr($str, $i, 1);
9226
                    if ($reversedTmp !== false) {
9227
                        $reversed .= $reversedTmp;
9228
                    }
9229
                }
9230
            }
9231
        } else {
9232
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9233
9234
            $i = (int) self::strlen($str, $encoding);
9235
            while ($i--) {
9236
                $reversedTmp = self::substr($str, $i, 1, $encoding);
9237
                if ($reversedTmp !== false) {
9238
                    $reversed .= $reversedTmp;
9239
                }
9240
            }
9241
        }
9242
9243 8
        return self::emoji_decode($reversed, true);
9244
    }
9245
9246
    /**
9247
     * Finds the last occurrence of a character in a string within another, case insensitive.
9248
     *
9249
     * @see http://php.net/manual/en/function.mb-strrichr.php
9250
     *
9251
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
9252
     * @param string $needle        <p>The string to find in haystack.</p>
9253
     * @param bool   $before_needle [optional] <p>
9254
     *                              Determines which portion of haystack
9255
     *                              this function returns.
9256
     *                              If set to true, it returns all of haystack
9257
     *                              from the beginning to the last occurrence of needle.
9258
     *                              If set to false, it returns all of haystack
9259
     *                              from the last occurrence of needle to the end,
9260
     *                              </p>
9261
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9262
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9263
     *
9264
     * @return false|string the portion of haystack or<br>false if needle is not found
9265
     */
9266 3
    public static function strrichr(
9267
        string $haystack,
9268
        string $needle,
9269
        bool $before_needle = false,
9270
        string $encoding = 'UTF-8',
9271
        bool $cleanUtf8 = false
9272
    ) {
9273 3
        if ($haystack === '' || $needle === '') {
9274 2
            return false;
9275
        }
9276
9277 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9278 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9279
        }
9280
9281 3
        if ($cleanUtf8 === true) {
9282
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9283
            // if invalid characters are found in $haystack before $needle
9284 2
            $needle = self::clean($needle);
9285 2
            $haystack = self::clean($haystack);
9286
        }
9287
9288
        //
9289
        // fallback via mbstring
9290
        //
9291
9292 3
        if (self::$SUPPORT['mbstring'] === true) {
9293 3
            if ($encoding === 'UTF-8') {
9294 3
                return \mb_strrichr($haystack, $needle, $before_needle);
9295
            }
9296
9297 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
9298
        }
9299
9300
        //
9301
        // fallback via vanilla php
9302
        //
9303
9304
        $needleTmp = self::substr($needle, 0, 1, $encoding);
9305
        if ($needleTmp === false) {
9306
            return false;
9307
        }
9308
        $needle = (string) $needleTmp;
9309
9310
        $pos = self::strripos($haystack, $needle, 0, $encoding);
9311
        if ($pos === false) {
9312
            return false;
9313
        }
9314
9315
        if ($before_needle) {
9316
            return self::substr($haystack, 0, $pos, $encoding);
9317
        }
9318
9319
        return self::substr($haystack, $pos, null, $encoding);
9320
    }
9321
9322
    /**
9323
     * Find position of last occurrence of a case-insensitive string.
9324
     *
9325
     * @param string     $haystack  <p>The string to look in.</p>
9326
     * @param int|string $needle    <p>The string to look for.</p>
9327
     * @param int        $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
9328
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9329
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9330
     *
9331
     * @return false|int
9332
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9333
     *                   string.<br>If needle is not found, it returns false.
9334
     */
9335 3
    public static function strripos(
9336
        string $haystack,
9337
        $needle,
9338
        int $offset = 0,
9339
        string $encoding = 'UTF-8',
9340
        bool $cleanUtf8 = false
9341
    ) {
9342 3
        if ($haystack === '') {
9343
            return false;
9344
        }
9345
9346
        // iconv and mbstring do not support integer $needle
9347 3
        if ((int) $needle === $needle && $needle >= 0) {
9348
            $needle = (string) self::chr($needle);
9349
        }
9350 3
        $needle = (string) $needle;
9351
9352 3
        if ($needle === '') {
9353
            return false;
9354
        }
9355
9356 3
        if ($cleanUtf8 === true) {
9357
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
9358 2
            $needle = self::clean($needle);
9359 2
            $haystack = self::clean($haystack);
9360
        }
9361
9362 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9363 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9364
        }
9365
9366
        //
9367
        // fallback via mbstrig
9368
        //
9369
9370 3
        if (self::$SUPPORT['mbstring'] === true) {
9371 3
            if ($encoding === 'UTF-8') {
9372 3
                return \mb_strripos($haystack, $needle, $offset);
9373
            }
9374
9375
            return \mb_strripos($haystack, $needle, $offset, $encoding);
9376
        }
9377
9378
        //
9379
        // fallback for binary || ascii only
9380
        //
9381
9382
        if (
9383
            $encoding === 'CP850'
9384
            ||
9385
            $encoding === 'ASCII'
9386
        ) {
9387
            return \strripos($haystack, $needle, $offset);
9388
        }
9389
9390
        if (
9391
            $encoding !== 'UTF-8'
9392
            &&
9393
            self::$SUPPORT['mbstring'] === false
9394
        ) {
9395
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9396
        }
9397
9398
        //
9399
        // fallback via intl
9400
        //
9401
9402
        if (
9403
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
9404
            &&
9405
            $offset >= 0 // grapheme_strripos() can't handle negative offset
9406
            &&
9407
            self::$SUPPORT['intl'] === true
9408
        ) {
9409
            $returnTmp = \grapheme_strripos($haystack, $needle, $offset);
9410
            if ($returnTmp !== false) {
9411
                return $returnTmp;
9412
            }
9413
        }
9414
9415
        //
9416
        // fallback for ascii only
9417
        //
9418
9419
        if (self::is_ascii($haystack . $needle)) {
9420
            return \strripos($haystack, $needle, $offset);
9421
        }
9422
9423
        //
9424
        // fallback via vanilla php
9425
        //
9426
9427
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
9428
        $needle = self::strtocasefold($needle, true, false, $encoding);
9429
9430
        return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8);
9431
    }
9432
9433
    /**
9434
     * Finds position of last occurrence of a string within another, case insensitive.
9435
     *
9436
     * @param string $haystack <p>
9437
     *                         The string from which to get the position of the last occurrence
9438
     *                         of needle.
9439
     *                         </p>
9440
     * @param string $needle   <p>
9441
     *                         The string to find in haystack.
9442
     *                         </p>
9443
     * @param int    $offset   [optional] <p>
9444
     *                         The position in haystack
9445
     *                         to start searching.
9446
     *                         </p>
9447
     *
9448
     * @return false|int return the numeric position of the last occurrence of needle in the
9449
     *                   haystack string, or false if needle is not found
9450
     */
9451
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
9452
    {
9453
        if ($haystack === '' || $needle === '') {
9454
            return false;
9455
        }
9456
9457
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9458
            // "mb_" is available if overload is used, so use it ...
9459
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9460
        }
9461
9462
        return \strripos($haystack, $needle, $offset);
9463
    }
9464
9465
    /**
9466
     * Find position of last occurrence of a string in a string.
9467
     *
9468
     * @see http://php.net/manual/en/function.mb-strrpos.php
9469
     *
9470
     * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
9471
     * @param int|string $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
9472
     * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
9473
     *                              into the string. Negative values will stop searching at an arbitrary point prior to
9474
     *                              the end of the string.
9475
     *                              </p>
9476
     * @param string     $encoding  [optional] <p>Set the charset.</p>
9477
     * @param bool       $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9478
     *
9479
     * @return false|int
9480
     *                   The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
9481
     *                   string.<br>If needle is not found, it returns false.
9482
     */
9483 35
    public static function strrpos(
9484
        string $haystack,
9485
        $needle,
9486
        int $offset = 0,
9487
        string $encoding = 'UTF-8',
9488
        bool $cleanUtf8 = false
9489
    ) {
9490 35
        if ($haystack === '') {
9491 3
            return false;
9492
        }
9493
9494
        // iconv and mbstring do not support integer $needle
9495 34
        if ((int) $needle === $needle && $needle >= 0) {
9496 2
            $needle = (string) self::chr($needle);
9497
        }
9498 34
        $needle = (string) $needle;
9499
9500 34
        if ($needle === '' || $haystack === '') {
9501 2
            return false;
9502
        }
9503
9504 34
        if ($cleanUtf8 === true) {
9505
            // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
9506 4
            $needle = self::clean($needle);
9507 4
            $haystack = self::clean($haystack);
9508
        }
9509
9510 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9511 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9512
        }
9513
9514
        //
9515
        // fallback via mbstring
9516
        //
9517
9518 34
        if (self::$SUPPORT['mbstring'] === true) {
9519 34
            if ($encoding === 'UTF-8') {
9520 34
                return \mb_strrpos($haystack, $needle, $offset);
9521
            }
9522
9523 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
9524
        }
9525
9526
        //
9527
        // fallback for binary || ascii only
9528
        //
9529
9530
        if (
9531
            $encoding === 'CP850'
9532
            ||
9533
            $encoding === 'ASCII'
9534
        ) {
9535
            return \strrpos($haystack, $needle, $offset);
9536
        }
9537
9538
        if (
9539
            $encoding !== 'UTF-8'
9540
            &&
9541
            self::$SUPPORT['mbstring'] === false
9542
        ) {
9543
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9544
        }
9545
9546
        //
9547
        // fallback via intl
9548
        //
9549
9550
        if (
9551
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
9552
            &&
9553
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
9554
            &&
9555
            self::$SUPPORT['intl'] === true
9556
        ) {
9557
            $returnTmp = \grapheme_strrpos($haystack, $needle, $offset);
9558
            if ($returnTmp !== false) {
9559
                return $returnTmp;
9560
            }
9561
        }
9562
9563
        //
9564
        // fallback for ascii only
9565
        //
9566
9567
        if (self::is_ascii($haystack . $needle)) {
9568
            return \strrpos($haystack, $needle, $offset);
9569
        }
9570
9571
        //
9572
        // fallback via vanilla php
9573
        //
9574
9575
        $haystackTmp = null;
9576
        if ($offset > 0) {
9577
            $haystackTmp = self::substr($haystack, $offset);
9578
        } elseif ($offset < 0) {
9579
            $haystackTmp = self::substr($haystack, 0, $offset);
9580
            $offset = 0;
9581
        }
9582
9583
        if ($haystackTmp !== null) {
9584
            if ($haystackTmp === false) {
9585
                $haystackTmp = '';
9586
            }
9587
            $haystack = (string) $haystackTmp;
9588
        }
9589
9590
        $pos = \strrpos($haystack, $needle);
9591
        if ($pos === false) {
9592
            return false;
9593
        }
9594
9595
        $strTmp = \substr($haystack, 0, $pos);
9596
        if ($strTmp === false) {
9597
            return false;
9598
        }
9599
9600
        return $offset + (int) self::strlen($strTmp);
9601
    }
9602
9603
    /**
9604
     * Find position of last occurrence of a string in a string.
9605
     *
9606
     * @param string $haystack <p>
9607
     *                         The string being checked, for the last occurrence
9608
     *                         of needle.
9609
     *                         </p>
9610
     * @param string $needle   <p>
9611
     *                         The string to find in haystack.
9612
     *                         </p>
9613
     * @param int    $offset   [optional] May be specified to begin searching an arbitrary number of characters into
9614
     *                         the string. Negative values will stop searching at an arbitrary point
9615
     *                         prior to the end of the string.
9616
     *
9617
     * @return false|int The numeric position of the last occurrence of needle in the
9618
     *                   haystack string. If needle is not found, it returns false.
9619
     */
9620
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
9621
    {
9622
        if ($haystack === '' || $needle === '') {
9623
            return false;
9624
        }
9625
9626
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9627
            // "mb_" is available if overload is used, so use it ...
9628
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
9629
        }
9630
9631
        return \strrpos($haystack, $needle, $offset);
9632
    }
9633
9634
    /**
9635
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
9636
     * mask.
9637
     *
9638
     * @param string $str      <p>The input string.</p>
9639
     * @param string $mask     <p>The mask of chars</p>
9640
     * @param int    $offset   [optional]
9641
     * @param int    $length   [optional]
9642
     * @param string $encoding [optional] <p>Set the charset.</p>
9643
     *
9644
     * @return false|int
9645
     */
9646 10
    public static function strspn(
9647
        string $str,
9648
        string $mask,
9649
        int $offset = 0,
9650
        int $length = null,
9651
        string $encoding = 'UTF-8'
9652
    ) {
9653 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9654
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9655
        }
9656
9657 10
        if ($offset || $length !== null) {
9658 2
            if ($encoding === 'UTF-8') {
9659 2
                if ($length === null) {
9660
                    $str = (string) \mb_substr($str, $offset);
9661
                } else {
9662 2
                    $str = (string) \mb_substr($str, $offset, $length);
9663
                }
9664
            } else {
9665
                $str = (string) self::substr($str, $offset, $length, $encoding);
9666
            }
9667
        }
9668
9669 10
        if ($str === '' || $mask === '') {
9670 2
            return 0;
9671
        }
9672
9673 8
        $matches = [];
9674
9675 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
9676
    }
9677
9678
    /**
9679
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
9680
     *
9681
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
9682
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
9683
     * @param bool   $before_needle [optional] <p>
9684
     *                              If <b>TRUE</b>, strstr() returns the part of the
9685
     *                              haystack before the first occurrence of the needle (excluding the needle).
9686
     *                              </p>
9687
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
9688
     * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9689
     *
9690
     * @return false|string
9691
     *                      A sub-string,<br>or <strong>false</strong> if needle is not found
9692
     */
9693 3
    public static function strstr(
9694
        string $haystack,
9695
        string $needle,
9696
        bool $before_needle = false,
9697
        string $encoding = 'UTF-8',
9698
        $cleanUtf8 = false
9699
    ) {
9700 3
        if ($haystack === '' || $needle === '') {
9701 2
            return false;
9702
        }
9703
9704 3
        if ($cleanUtf8 === true) {
9705
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9706
            // if invalid characters are found in $haystack before $needle
9707
            $needle = self::clean($needle);
9708
            $haystack = self::clean($haystack);
9709
        }
9710
9711 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9712 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9713
        }
9714
9715
        //
9716
        // fallback via mbstring
9717
        //
9718
9719 3
        if (self::$SUPPORT['mbstring'] === true) {
9720 3
            if ($encoding === 'UTF-8') {
9721 3
                return \mb_strstr($haystack, $needle, $before_needle);
9722
            }
9723
9724 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
9725
        }
9726
9727
        //
9728
        // fallback for binary || ascii only
9729
        //
9730
9731
        if (
9732
            $encoding === 'CP850'
9733
            ||
9734
            $encoding === 'ASCII'
9735
        ) {
9736
            return \strstr($haystack, $needle, $before_needle);
9737
        }
9738
9739
        if (
9740
            $encoding !== 'UTF-8'
9741
            &&
9742
            self::$SUPPORT['mbstring'] === false
9743
        ) {
9744
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
9745
        }
9746
9747
        //
9748
        // fallback via intl
9749
        //
9750
9751
        if (
9752
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
9753
            &&
9754
            self::$SUPPORT['intl'] === true
9755
        ) {
9756
            $returnTmp = \grapheme_strstr($haystack, $needle, $before_needle);
9757
            if ($returnTmp !== false) {
9758
                return $returnTmp;
9759
            }
9760
        }
9761
9762
        //
9763
        // fallback for ascii only
9764
        //
9765
9766
        if (self::is_ascii($haystack . $needle)) {
9767
            return \strstr($haystack, $needle, $before_needle);
9768
        }
9769
9770
        //
9771
        // fallback via vanilla php
9772
        //
9773
9774
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
9775
9776
        if (!isset($match[1])) {
9777
            return false;
9778
        }
9779
9780
        if ($before_needle) {
9781
            return $match[1];
9782
        }
9783
9784
        return self::substr($haystack, (int) self::strlen($match[1]));
9785
    }
9786
9787
    /**
9788
     *  * Finds first occurrence of a string within another.
9789
     *
9790
     * @param string $haystack      <p>
9791
     *                              The string from which to get the first occurrence
9792
     *                              of needle.
9793
     *                              </p>
9794
     * @param string $needle        <p>
9795
     *                              The string to find in haystack.
9796
     *                              </p>
9797
     * @param bool   $before_needle [optional] <p>
9798
     *                              Determines which portion of haystack
9799
     *                              this function returns.
9800
     *                              If set to true, it returns all of haystack
9801
     *                              from the beginning to the first occurrence of needle.
9802
     *                              If set to false, it returns all of haystack
9803
     *                              from the first occurrence of needle to the end,
9804
     *                              </p>
9805
     *
9806
     * @return false|string the portion of haystack,
9807
     *                      or false if needle is not found
9808
     */
9809
    public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false)
9810
    {
9811
        if ($haystack === '' || $needle === '') {
9812
            return false;
9813
        }
9814
9815
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
9816
            // "mb_" is available if overload is used, so use it ...
9817
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
9818
        }
9819
9820
        return \strstr($haystack, $needle, $before_needle);
9821
    }
9822
9823
    /**
9824
     * Unicode transformation for case-less matching.
9825
     *
9826
     * @see http://unicode.org/reports/tr21/tr21-5.html
9827
     *
9828
     * @param string      $str       <p>The input string.</p>
9829
     * @param bool        $full      [optional] <p>
9830
     *                               <b>true</b>, replace full case folding chars (default)<br>
9831
     *                               <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
9832
     *                               </p>
9833
     * @param bool        $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9834
     * @param string      $encoding  [optional] <p>Set the charset.</p>
9835
     * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9836
     * @param bool        $lower     [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
9837
     *                               is for some languages better ...</p>
9838
     *
9839
     * @return string
9840
     */
9841 32
    public static function strtocasefold(
9842
        string $str,
9843
        bool $full = true,
9844
        bool $cleanUtf8 = false,
9845
        string $encoding = 'UTF-8',
9846
        string $lang = null,
9847
        $lower = true
9848
    ): string {
9849 32
        if ($str === '') {
9850 5
            return '';
9851
        }
9852
9853 31
        if ($cleanUtf8 === true) {
9854
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9855
            // if invalid characters are found in $haystack before $needle
9856 2
            $str = self::clean($str);
9857
        }
9858
9859 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
9860
9861 31
        if ($lang === null && $encoding === 'UTF-8') {
9862 31
            if ($lower === true) {
9863 2
                return \mb_strtolower($str);
9864
            }
9865
9866 29
            return \mb_strtoupper($str);
9867
        }
9868
9869 2
        if ($lower === true) {
9870
            return self::strtolower($str, $encoding, $cleanUtf8, $lang);
9871
        }
9872
9873 2
        return self::strtoupper($str, $encoding, $cleanUtf8, $lang);
9874
    }
9875
9876
    /**
9877
     * Make a string lowercase.
9878
     *
9879
     * @see http://php.net/manual/en/function.mb-strtolower.php
9880
     *
9881
     * @param string      $str                   <p>The string being lowercased.</p>
9882
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
9883
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9884
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9885
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9886
     *
9887
     * @return string
9888
     *                <p>String with all alphabetic characters converted to lowercase.</p>
9889
     */
9890 73
    public static function strtolower(
9891
        $str,
9892
        string $encoding = 'UTF-8',
9893
        bool $cleanUtf8 = false,
9894
        string $lang = null,
9895
        bool $tryToKeepStringLength = false
9896
    ): string {
9897
        // init
9898 73
        $str = (string) $str;
9899
9900 73
        if ($str === '') {
9901 1
            return '';
9902
        }
9903
9904 72
        if ($cleanUtf8 === true) {
9905
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9906
            // if invalid characters are found in $haystack before $needle
9907 2
            $str = self::clean($str);
9908
        }
9909
9910
        // hack for old php version or for the polyfill ...
9911 72
        if ($tryToKeepStringLength === true) {
9912
            $str = self::fixStrCaseHelper($str, true);
9913
        }
9914
9915 72
        if ($lang === null && $encoding === 'UTF-8') {
9916 13
            return \mb_strtolower($str);
9917
        }
9918
9919 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9920
9921 61
        if ($lang !== null) {
9922 2
            if (self::$SUPPORT['intl'] === true) {
9923 2
                $langCode = $lang . '-Lower';
9924 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9925
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING);
9926
9927
                    $langCode = 'Any-Lower';
9928
                }
9929
9930
                /** @noinspection PhpComposerExtensionStubsInspection */
9931
                /** @noinspection UnnecessaryCastingInspection */
9932 2
                return (string) \transliterator_transliterate($langCode, $str);
9933
            }
9934
9935
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
9936
        }
9937
9938
        // always fallback via symfony polyfill
9939 61
        return \mb_strtolower($str, $encoding);
9940
    }
9941
9942
    /**
9943
     * Make a string uppercase.
9944
     *
9945
     * @see http://php.net/manual/en/function.mb-strtoupper.php
9946
     *
9947
     * @param string      $str                   <p>The string being uppercased.</p>
9948
     * @param string      $encoding              [optional] <p>Set the charset.</p>
9949
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
9950
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
9951
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
9952
     *
9953
     * @return string
9954
     *                <p>String with all alphabetic characters converted to uppercase.</p>
9955
     */
9956 17
    public static function strtoupper(
9957
        $str,
9958
        string $encoding = 'UTF-8',
9959
        bool $cleanUtf8 = false,
9960
        string $lang = null,
9961
        bool $tryToKeepStringLength = false
9962
    ): string {
9963
        // init
9964 17
        $str = (string) $str;
9965
9966 17
        if ($str === '') {
9967 1
            return '';
9968
        }
9969
9970 16
        if ($cleanUtf8 === true) {
9971
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9972
            // if invalid characters are found in $haystack before $needle
9973 2
            $str = self::clean($str);
9974
        }
9975
9976
        // hack for old php version or for the polyfill ...
9977 16
        if ($tryToKeepStringLength === true) {
9978 2
            $str = self::fixStrCaseHelper($str, false);
9979
        }
9980
9981 16
        if ($lang === null && $encoding === 'UTF-8') {
9982 8
            return \mb_strtoupper($str);
9983
        }
9984
9985 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9986
9987 10
        if ($lang !== null) {
9988 2
            if (self::$SUPPORT['intl'] === true) {
9989 2
                $langCode = $lang . '-Upper';
9990 2
                if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
9991
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
9992
9993
                    $langCode = 'Any-Upper';
9994
                }
9995
9996
                /** @noinspection PhpComposerExtensionStubsInspection */
9997
                /** @noinspection UnnecessaryCastingInspection */
9998 2
                return (string) \transliterator_transliterate($langCode, $str);
9999
            }
10000
10001
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
10002
        }
10003
10004
        // always fallback via symfony polyfill
10005 10
        return \mb_strtoupper($str, $encoding);
10006
    }
10007
10008
    /**
10009
     * Translate characters or replace sub-strings.
10010
     *
10011
     * @see  http://php.net/manual/en/function.strtr.php
10012
     *
10013
     * @param string          $str  <p>The string being translated.</p>
10014
     * @param string|string[] $from <p>The string replacing from.</p>
10015
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
10016
     *
10017
     * @return string
10018
     *                This function returns a copy of str, translating all occurrences of each character in from to the
10019
     *                corresponding character in to
10020
     */
10021 2
    public static function strtr(string $str, $from, $to = ''): string
10022
    {
10023 2
        if ($str === '') {
10024
            return '';
10025
        }
10026
10027 2
        if ($from === $to) {
10028
            return $str;
10029
        }
10030
10031 2
        if ($to !== '') {
10032 2
            $from = self::str_split($from);
10033 2
            $to = self::str_split($to);
10034 2
            $countFrom = \count($from);
10035 2
            $countTo = \count($to);
10036
10037 2
            if ($countFrom > $countTo) {
10038 2
                $from = \array_slice($from, 0, $countTo);
10039 2
            } elseif ($countFrom < $countTo) {
10040 2
                $to = \array_slice($to, 0, $countFrom);
10041
            }
10042
10043 2
            $from = \array_combine($from, $to);
10044 2
            if ($from === false) {
10045
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
10046
            }
10047
        }
10048
10049 2
        if (\is_string($from)) {
10050 2
            return \str_replace($from, '', $str);
10051
        }
10052
10053 2
        return \strtr($str, $from);
10054
    }
10055
10056
    /**
10057
     * Return the width of a string.
10058
     *
10059
     * @param string $str       <p>The input string.</p>
10060
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10061
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10062
     *
10063
     * @return int
10064
     */
10065 2
    public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int
10066
    {
10067 2
        if ($str === '') {
10068 2
            return 0;
10069
        }
10070
10071 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10072 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10073
        }
10074
10075 2
        if ($cleanUtf8 === true) {
10076
            // iconv and mbstring are not tolerant to invalid encoding
10077
            // further, their behaviour is inconsistent with that of PHP's substr
10078 2
            $str = self::clean($str);
10079
        }
10080
10081
        //
10082
        // fallback via mbstring
10083
        //
10084
10085 2
        if (self::$SUPPORT['mbstring'] === true) {
10086 2
            if ($encoding === 'UTF-8') {
10087 2
                return \mb_strwidth($str);
10088
            }
10089
10090
            return \mb_strwidth($str, $encoding);
10091
        }
10092
10093
        //
10094
        // fallback via vanilla php
10095
        //
10096
10097
        if ($encoding !== 'UTF-8') {
10098
            $str = self::encode('UTF-8', $str, false, $encoding);
10099
        }
10100
10101
        $wide = 0;
10102
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
10103
10104
        return ($wide << 1) + (int) self::strlen($str, 'UTF-8');
10105
    }
10106
10107
    /**
10108
     * Get part of a string.
10109
     *
10110
     * @see http://php.net/manual/en/function.mb-substr.php
10111
     *
10112
     * @param string $str       <p>The string being checked.</p>
10113
     * @param int    $offset    <p>The first position used in str.</p>
10114
     * @param int    $length    [optional] <p>The maximum length of the returned string.</p>
10115
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10116
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10117
     *
10118
     * @return false|string
10119
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10120
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10121
     *                      characters long, <b>FALSE</b> will be returned.
10122
     */
10123 172
    public static function substr(
10124
        string $str,
10125
        int $offset = 0,
10126
        int $length = null,
10127
        string $encoding = 'UTF-8',
10128
        bool $cleanUtf8 = false
10129
    ) {
10130
        // empty string
10131 172
        if ($str === '' || $length === 0) {
10132 8
            return '';
10133
        }
10134
10135 168
        if ($cleanUtf8 === true) {
10136
            // iconv and mbstring are not tolerant to invalid encoding
10137
            // further, their behaviour is inconsistent with that of PHP's substr
10138 2
            $str = self::clean($str);
10139
        }
10140
10141
        // whole string
10142 168
        if (!$offset && $length === null) {
10143 7
            return $str;
10144
        }
10145
10146 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10147 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10148
        }
10149
10150
        //
10151
        // fallback via mbstring
10152
        //
10153
10154 163
        if (self::$SUPPORT['mbstring'] === true) {
10155 161
            if ($encoding === 'UTF-8') {
10156 161
                if ($length === null) {
10157 64
                    return \mb_substr($str, $offset);
10158
                }
10159
10160 102
                return \mb_substr($str, $offset, $length);
10161
            }
10162
10163
            return self::substr($str, $offset, $length, $encoding);
10164
        }
10165
10166
        //
10167
        // fallback for binary || ascii only
10168
        //
10169
10170
        if (
10171 4
            $encoding === 'CP850'
10172
            ||
10173 4
            $encoding === 'ASCII'
10174
        ) {
10175
            if ($length === null) {
10176
                return \substr($str, $offset);
10177
            }
10178
10179
            return \substr($str, $offset, $length);
10180
        }
10181
10182
        // otherwise we need the string-length
10183 4
        $str_length = 0;
10184 4
        if ($offset || $length === null) {
10185 4
            $str_length = self::strlen($str, $encoding);
10186
        }
10187
10188
        // e.g.: invalid chars + mbstring not installed
10189 4
        if ($str_length === false) {
10190
            return false;
10191
        }
10192
10193
        // empty string
10194 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
10195
            return '';
10196
        }
10197
10198
        // impossible
10199 4
        if ($offset && $offset > $str_length) {
10200
            return '';
10201
        }
10202
10203 4
        if ($length === null) {
10204 4
            $length = (int) $str_length;
10205
        } else {
10206 2
            $length = (int) $length;
10207
        }
10208
10209
        if (
10210 4
            $encoding !== 'UTF-8'
10211
            &&
10212 4
            self::$SUPPORT['mbstring'] === false
10213
        ) {
10214 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10215
        }
10216
10217
        //
10218
        // fallback via intl
10219
        //
10220
10221
        if (
10222 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
10223
            &&
10224 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
10225
            &&
10226 4
            self::$SUPPORT['intl'] === true
10227
        ) {
10228
            $returnTmp = \grapheme_substr($str, $offset, $length);
10229
            if ($returnTmp !== false) {
10230
                return $returnTmp;
10231
            }
10232
        }
10233
10234
        //
10235
        // fallback via iconv
10236
        //
10237
10238
        if (
10239 4
            $length >= 0 // "iconv_substr()" can't handle negative length
10240
            &&
10241 4
            self::$SUPPORT['iconv'] === true
10242
        ) {
10243
            $returnTmp = \iconv_substr($str, $offset, $length);
10244
            if ($returnTmp !== false) {
10245
                return $returnTmp;
10246
            }
10247
        }
10248
10249
        //
10250
        // fallback for ascii only
10251
        //
10252
10253 4
        if (self::is_ascii($str)) {
10254
            return \substr($str, $offset, $length);
10255
        }
10256
10257
        //
10258
        // fallback via vanilla php
10259
        //
10260
10261
        // split to array, and remove invalid characters
10262 4
        $array = self::str_split($str);
10263
10264
        // extract relevant part, and join to make sting again
10265 4
        return \implode('', \array_slice($array, $offset, $length));
10266
    }
10267
10268
    /**
10269
     * Binary safe comparison of two strings from an offset, up to length characters.
10270
     *
10271
     * @param string   $str1               <p>The main string being compared.</p>
10272
     * @param string   $str2               <p>The secondary string being compared.</p>
10273
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
10274
     *                                     counting from the end of the string.</p>
10275
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
10276
     *                                     of the length of the str compared to the length of main_str less the
10277
     *                                     offset.</p>
10278
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
10279
     *                                     insensitive.</p>
10280
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
10281
     *
10282
     * @return int
10283
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10284
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
10285
     *             <strong>0</strong> if they are equal
10286
     */
10287 2
    public static function substr_compare(
10288
        string $str1,
10289
        string $str2,
10290
        int $offset = 0,
10291
        int $length = null,
10292
        bool $case_insensitivity = false,
10293
        string $encoding = 'UTF-8'
10294
    ): int {
10295
        if (
10296 2
            $offset !== 0
10297
            ||
10298 2
            $length !== null
10299
        ) {
10300 2
            if ($encoding === 'UTF-8') {
10301 2
                if ($length === null) {
10302 2
                    $str1 = (string) \mb_substr($str1, $offset);
10303
                } else {
10304 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
10305
                }
10306 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
10307
            } else {
10308
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
10309
10310
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
10311
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
10312
            }
10313
        }
10314
10315 2
        if ($case_insensitivity === true) {
10316 2
            return self::strcasecmp($str1, $str2, $encoding);
10317
        }
10318
10319 2
        return self::strcmp($str1, $str2);
10320
    }
10321
10322
    /**
10323
     * Count the number of substring occurrences.
10324
     *
10325
     * @see  http://php.net/manual/en/function.substr-count.php
10326
     *
10327
     * @param string $haystack  <p>The string to search in.</p>
10328
     * @param string $needle    <p>The substring to search for.</p>
10329
     * @param int    $offset    [optional] <p>The offset where to start counting.</p>
10330
     * @param int    $length    [optional] <p>
10331
     *                          The maximum length after the specified offset to search for the
10332
     *                          substring. It outputs a warning if the offset plus the length is
10333
     *                          greater than the haystack length.
10334
     *                          </p>
10335
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10336
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10337
     *
10338
     * @return false|int this functions returns an integer or false if there isn't a string
10339
     */
10340 5
    public static function substr_count(
10341
        string $haystack,
10342
        string $needle,
10343
        int $offset = 0,
10344
        int $length = null,
10345
        string $encoding = 'UTF-8',
10346
        bool $cleanUtf8 = false
10347
    ) {
10348 5
        if ($haystack === '' || $needle === '') {
10349 2
            return false;
10350
        }
10351
10352 5
        if ($length === 0) {
10353 2
            return 0;
10354
        }
10355
10356 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10357 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10358
        }
10359
10360 5
        if ($cleanUtf8 === true) {
10361
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10362
            // if invalid characters are found in $haystack before $needle
10363
            $needle = self::clean($needle);
10364
            $haystack = self::clean($haystack);
10365
        }
10366
10367 5
        if ($offset || $length > 0) {
10368 2
            if ($length === null) {
10369 2
                $lengthTmp = self::strlen($haystack, $encoding);
10370 2
                if ($lengthTmp === false) {
10371
                    return false;
10372
                }
10373 2
                $length = (int) $lengthTmp;
10374
            }
10375
10376 2
            if ($encoding === 'UTF-8') {
10377 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
10378
            } else {
10379 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
10380
            }
10381
        }
10382
10383
        if (
10384 5
            $encoding !== 'UTF-8'
10385
            &&
10386 5
            self::$SUPPORT['mbstring'] === false
10387
        ) {
10388
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10389
        }
10390
10391 5
        if (self::$SUPPORT['mbstring'] === true) {
10392 5
            if ($encoding === 'UTF-8') {
10393 5
                return \mb_substr_count($haystack, $needle);
10394
            }
10395
10396 2
            return \mb_substr_count($haystack, $needle, $encoding);
10397
        }
10398
10399
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
10400
10401
        return \count($matches);
10402
    }
10403
10404
    /**
10405
     * Count the number of substring occurrences.
10406
     *
10407
     * @param string $haystack <p>
10408
     *                         The string being checked.
10409
     *                         </p>
10410
     * @param string $needle   <p>
10411
     *                         The string being found.
10412
     *                         </p>
10413
     * @param int    $offset   [optional] <p>
10414
     *                         The offset where to start counting
10415
     *                         </p>
10416
     * @param int    $length   [optional] <p>
10417
     *                         The maximum length after the specified offset to search for the
10418
     *                         substring. It outputs a warning if the offset plus the length is
10419
     *                         greater than the haystack length.
10420
     *                         </p>
10421
     *
10422
     * @return false|int the number of times the
10423
     *                   needle substring occurs in the
10424
     *                   haystack string
10425
     */
10426
    public static function substr_count_in_byte(
10427
        string $haystack,
10428
        string $needle,
10429
        int $offset = 0,
10430
        int $length = null
10431
    ) {
10432
        if ($haystack === '' || $needle === '') {
10433
            return 0;
10434
        }
10435
10436
        if (
10437
            ($offset || $length !== null)
10438
            &&
10439
            self::$SUPPORT['mbstring_func_overload'] === true
10440
        ) {
10441
            if ($length === null) {
10442
                $lengthTmp = self::strlen($haystack);
10443
                if ($lengthTmp === false) {
10444
                    return false;
10445
                }
10446
                $length = (int) $lengthTmp;
10447
            }
10448
10449
            if (
10450
                (
10451
                    $length !== 0
10452
                    &&
10453
                    $offset !== 0
10454
                )
10455
                &&
10456
                ($length + $offset) <= 0
10457
                &&
10458
                Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
10459
            ) {
10460
                return false;
10461
            }
10462
10463
            $haystackTmp = \substr($haystack, $offset, $length);
10464
            if ($haystackTmp === false) {
10465
                $haystackTmp = '';
10466
            }
10467
            $haystack = (string) $haystackTmp;
10468
        }
10469
10470
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10471
            // "mb_" is available if overload is used, so use it ...
10472
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
10473
        }
10474
10475
        if ($length === null) {
10476
            return \substr_count($haystack, $needle, $offset);
10477
        }
10478
10479
        return \substr_count($haystack, $needle, $offset, $length);
10480
    }
10481
10482
    /**
10483
     * Returns the number of occurrences of $substring in the given string.
10484
     * By default, the comparison is case-sensitive, but can be made insensitive
10485
     * by setting $caseSensitive to false.
10486
     *
10487
     * @param string $str           <p>The input string.</p>
10488
     * @param string $substring     <p>The substring to search for.</p>
10489
     * @param bool   $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
10490
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10491
     *
10492
     * @return int
10493
     */
10494 15
    public static function substr_count_simple(
10495
        string $str,
10496
        string $substring,
10497
        bool $caseSensitive = true,
10498
        string $encoding = 'UTF-8'
10499
    ): int {
10500 15
        if ($str === '' || $substring === '') {
10501 2
            return 0;
10502
        }
10503
10504 13
        if ($encoding === 'UTF-8') {
10505 7
            if ($caseSensitive) {
10506
                return (int) \mb_substr_count($str, $substring);
10507
            }
10508
10509 7
            return (int) \mb_substr_count(
10510 7
                \mb_strtoupper($str),
10511 7
                \mb_strtoupper($substring)
10512
10513
            );
10514
        }
10515
10516 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10517
10518 6
        if ($caseSensitive) {
10519 3
            return (int) \mb_substr_count($str, $substring, $encoding);
10520
        }
10521
10522 3
        return (int) \mb_substr_count(
10523 3
            self::strtocasefold($str, true, false, $encoding, null, false),
10524 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
10525 3
            $encoding
10526
        );
10527
    }
10528
10529
    /**
10530
     * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
10531
     *
10532
     * @param string $haystack <p>The string to search in.</p>
10533
     * @param string $needle   <p>The substring to search for.</p>
10534
     *
10535
     * @return string return the sub-string
10536
     */
10537 2
    public static function substr_ileft(string $haystack, string $needle): string
10538
    {
10539 2
        if ($haystack === '') {
10540 2
            return '';
10541
        }
10542
10543 2
        if ($needle === '') {
10544 2
            return $haystack;
10545
        }
10546
10547 2
        if (self::str_istarts_with($haystack, $needle) === true) {
10548 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10549
        }
10550
10551 2
        return $haystack;
10552
    }
10553
10554
    /**
10555
     * Get part of a string process in bytes.
10556
     *
10557
     * @param string $str    <p>The string being checked.</p>
10558
     * @param int    $offset <p>The first position used in str.</p>
10559
     * @param int    $length [optional] <p>The maximum length of the returned string.</p>
10560
     *
10561
     * @return false|string
10562
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
10563
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
10564
     *                      characters long, <b>FALSE</b> will be returned.
10565
     */
10566
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
10567
    {
10568
        // empty string
10569
        if ($str === '' || $length === 0) {
10570
            return '';
10571
        }
10572
10573
        // whole string
10574
        if (!$offset && $length === null) {
10575
            return $str;
10576
        }
10577
10578
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10579
            // "mb_" is available if overload is used, so use it ...
10580
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
10581
        }
10582
10583
        return \substr($str, $offset, $length ?? 2147483647);
10584
    }
10585
10586
    /**
10587
     * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
10588
     *
10589
     * @param string $haystack <p>The string to search in.</p>
10590
     * @param string $needle   <p>The substring to search for.</p>
10591
     *
10592
     * @return string return the sub-string
10593
     */
10594 2
    public static function substr_iright(string $haystack, string $needle): string
10595
    {
10596 2
        if ($haystack === '') {
10597 2
            return '';
10598
        }
10599
10600 2
        if ($needle === '') {
10601 2
            return $haystack;
10602
        }
10603
10604 2
        if (self::str_iends_with($haystack, $needle) === true) {
10605 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
10606
        }
10607
10608 2
        return $haystack;
10609
    }
10610
10611
    /**
10612
     * Removes an prefix ($needle) from start of the string ($haystack).
10613
     *
10614
     * @param string $haystack <p>The string to search in.</p>
10615
     * @param string $needle   <p>The substring to search for.</p>
10616
     *
10617
     * @return string return the sub-string
10618
     */
10619 2
    public static function substr_left(string $haystack, string $needle): string
10620
    {
10621 2
        if ($haystack === '') {
10622 2
            return '';
10623
        }
10624
10625 2
        if ($needle === '') {
10626 2
            return $haystack;
10627
        }
10628
10629 2
        if (self::str_starts_with($haystack, $needle) === true) {
10630 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
10631
        }
10632
10633 2
        return $haystack;
10634
    }
10635
10636
    /**
10637
     * Replace text within a portion of a string.
10638
     *
10639
     * source: https://gist.github.com/stemar/8287074
10640
     *
10641
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
10642
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
10643
     * @param int|int[]       $offset      <p>
10644
     *                                     If start is positive, the replacing will begin at the start'th offset
10645
     *                                     into string.
10646
     *                                     <br><br>
10647
     *                                     If start is negative, the replacing will begin at the start'th character
10648
     *                                     from the end of string.
10649
     *                                     </p>
10650
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
10651
     *                                     portion of string which is to be replaced. If it is negative, it
10652
     *                                     represents the number of characters from the end of string at which to
10653
     *                                     stop replacing. If it is not given, then it will default to strlen(
10654
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
10655
     *                                     length is zero then this function will have the effect of inserting
10656
     *                                     replacement into string at the given start offset.</p>
10657
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
10658
     *
10659
     * @return string|string[] The result string is returned. If string is an array then array is returned.
10660
     */
10661 10
    public static function substr_replace(
10662
        $str,
10663
        $replacement,
10664
        $offset,
10665
        $length = null,
10666
        string $encoding = 'UTF-8'
10667
    ) {
10668 10
        if (\is_array($str) === true) {
10669 1
            $num = \count($str);
10670
10671
            // the replacement
10672 1
            if (\is_array($replacement) === true) {
10673 1
                $replacement = \array_slice($replacement, 0, $num);
10674
            } else {
10675 1
                $replacement = \array_pad([$replacement], $num, $replacement);
10676
            }
10677
10678
            // the offset
10679 1
            if (\is_array($offset) === true) {
10680 1
                $offset = \array_slice($offset, 0, $num);
10681 1
                foreach ($offset as &$valueTmp) {
10682 1
                    $valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0;
10683
                }
10684 1
                unset($valueTmp);
10685
            } else {
10686 1
                $offset = \array_pad([$offset], $num, $offset);
10687
            }
10688
10689
            // the length
10690 1
            if ($length === null) {
10691 1
                $length = \array_fill(0, $num, 0);
10692 1
            } elseif (\is_array($length) === true) {
10693 1
                $length = \array_slice($length, 0, $num);
10694 1
                foreach ($length as &$valueTmpV2) {
10695 1
                    $valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
10696
                }
10697 1
                unset($valueTmpV2);
10698
            } else {
10699 1
                $length = \array_pad([$length], $num, $length);
10700
            }
10701
10702
            // recursive call
10703 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
10704
        }
10705
10706 10
        if (\is_array($replacement) === true) {
10707 1
            if (\count($replacement) > 0) {
10708 1
                $replacement = $replacement[0];
10709
            } else {
10710 1
                $replacement = '';
10711
            }
10712
        }
10713
10714
        // init
10715 10
        $str = (string) $str;
10716 10
        $replacement = (string) $replacement;
10717
10718 10
        if (\is_array($length) === true) {
10719
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
10720
        }
10721
10722 10
        if (\is_array($offset) === true) {
10723
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
10724
        }
10725
10726 10
        if ($str === '') {
10727 1
            return $replacement;
10728
        }
10729
10730 9
        if (self::$SUPPORT['mbstring'] === true) {
10731 9
            $string_length = (int) self::strlen($str, $encoding);
10732
10733 9
            if ($offset < 0) {
10734 1
                $offset = (int) \max(0, $string_length + $offset);
10735 9
            } elseif ($offset > $string_length) {
10736 1
                $offset = $string_length;
10737
            }
10738
10739 9
            if ($length !== null && $length < 0) {
10740 1
                $length = (int) \max(0, $string_length - $offset + $length);
10741 9
            } elseif ($length === null || $length > $string_length) {
10742 4
                $length = $string_length;
10743
            }
10744
10745
            /** @noinspection AdditionOperationOnArraysInspection */
10746 9
            if (($offset + $length) > $string_length) {
10747 4
                $length = $string_length - $offset;
10748
            }
10749
10750
            /** @noinspection AdditionOperationOnArraysInspection */
10751 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
10752 9
                   $replacement .
10753 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
10754
        }
10755
10756
        //
10757
        // fallback for ascii only
10758
        //
10759
10760
        if (self::is_ascii($str)) {
10761
            return ($length === null) ?
10762
                \substr_replace($str, $replacement, $offset) :
10763
                \substr_replace($str, $replacement, $offset, $length);
10764
        }
10765
10766
        //
10767
        // fallback via vanilla php
10768
        //
10769
10770
        \preg_match_all('/./us', $str, $smatches);
10771
        \preg_match_all('/./us', $replacement, $rmatches);
10772
10773
        if ($length === null) {
10774
            $lengthTmp = self::strlen($str, $encoding);
10775
            if ($lengthTmp === false) {
10776
                // e.g.: non mbstring support + invalid chars
10777
                return '';
10778
            }
10779
            $length = (int) $lengthTmp;
10780
        }
10781
10782
        \array_splice($smatches[0], $offset, $length, $rmatches[0]);
10783
10784
        return \implode('', $smatches[0]);
10785
    }
10786
10787
    /**
10788
     * Removes an suffix ($needle) from end of the string ($haystack).
10789
     *
10790
     * @param string $haystack <p>The string to search in.</p>
10791
     * @param string $needle   <p>The substring to search for.</p>
10792
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10793
     *
10794
     * @return string return the sub-string
10795
     */
10796 2
    public static function substr_right(
10797
        string $haystack,
10798
        string $needle,
10799
        string $encoding = 'UTF-8'
10800
    ): string {
10801 2
        if ($haystack === '') {
10802 2
            return '';
10803
        }
10804
10805 2
        if ($needle === '') {
10806 2
            return $haystack;
10807
        }
10808
10809
        if (
10810 2
            $encoding === 'UTF-8'
10811
            &&
10812 2
            \substr($haystack, -\strlen($needle)) === $needle
10813
        ) {
10814 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
10815
        }
10816
10817 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
10818
            return (string) self::substr(
10819
                $haystack,
10820
                0,
10821
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
10822
                $encoding
10823
            );
10824
        }
10825
10826 2
        return $haystack;
10827
    }
10828
10829
    /**
10830
     * Returns a case swapped version of the string.
10831
     *
10832
     * @param string $str       <p>The input string.</p>
10833
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
10834
     * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10835
     *
10836
     * @return string each character's case swapped
10837
     */
10838 6
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
10839
    {
10840 6
        if ($str === '') {
10841 1
            return '';
10842
        }
10843
10844 6
        if ($cleanUtf8 === true) {
10845
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10846
            // if invalid characters are found in $haystack before $needle
10847 2
            $str = self::clean($str);
10848
        }
10849
10850 6
        if ($encoding === 'UTF-8') {
10851 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
10852
        }
10853
10854 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
10855
    }
10856
10857
    /**
10858
     * Checks whether symfony-polyfills are used.
10859
     *
10860
     * @return bool
10861
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
10862
     */
10863
    public static function symfony_polyfill_used(): bool
10864
    {
10865
        // init
10866
        $return = false;
10867
10868
        $returnTmp = \extension_loaded('mbstring');
10869
        if ($returnTmp === false && \function_exists('mb_strlen')) {
10870
            $return = true;
10871
        }
10872
10873
        $returnTmp = \extension_loaded('iconv');
10874
        if ($returnTmp === false && \function_exists('iconv')) {
10875
            $return = true;
10876
        }
10877
10878
        return $return;
10879
    }
10880
10881
    /**
10882
     * @param string $str
10883
     * @param int    $tabLength
10884
     *
10885
     * @return string
10886
     */
10887 6
    public static function tabs_to_spaces(string $str, int $tabLength = 4): string
10888
    {
10889 6
        if ($tabLength === 4) {
10890 3
            $spaces = '    ';
10891 3
        } elseif ($tabLength === 2) {
10892 1
            $spaces = '  ';
10893
        } else {
10894 2
            $spaces = \str_repeat(' ', $tabLength);
10895
        }
10896
10897 6
        return \str_replace("\t", $spaces, $str);
10898
    }
10899
10900
    /**
10901
     * Converts the first character of each word in the string to uppercase
10902
     * and all other chars to lowercase.
10903
     *
10904
     * @param string      $str                   <p>The input string.</p>
10905
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
10906
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
10907
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
10908
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
10909
     *
10910
     * @return string string with all characters of $str being title-cased
10911
     */
10912 5
    public static function titlecase(
10913
        string $str,
10914
        string $encoding = 'UTF-8',
10915
        bool $cleanUtf8 = false,
10916
        string $lang = null,
10917
        bool $tryToKeepStringLength = false
10918
    ): string {
10919 5
        if ($cleanUtf8 === true) {
10920
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10921
            // if invalid characters are found in $haystack before $needle
10922
            $str = self::clean($str);
10923
        }
10924
10925 5
        if ($lang === null && $tryToKeepStringLength === false) {
10926 5
            if ($encoding === 'UTF-8') {
10927 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
10928
            }
10929
10930 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10931
10932 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
10933
        }
10934
10935
        return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false);
10936
    }
10937
10938
    /**
10939
     * alias for "UTF8::to_ascii()"
10940
     *
10941
     * @see        UTF8::to_ascii()
10942
     *
10943
     * @param string $str
10944
     * @param string $subst_chr
10945
     * @param bool   $strict
10946
     *
10947
     * @return string
10948
     *
10949
     * @deprecated <p>use "UTF8::to_ascii()"</p>
10950
     */
10951 7
    public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string
10952
    {
10953 7
        return self::to_ascii($str, $subst_chr, $strict);
10954
    }
10955
10956
    /**
10957
     * alias for "UTF8::to_iso8859()"
10958
     *
10959
     * @see        UTF8::to_iso8859()
10960
     *
10961
     * @param string|string[] $str
10962
     *
10963
     * @return string|string[]
10964
     *
10965
     * @deprecated <p>use "UTF8::to_iso8859()"</p>
10966
     */
10967 2
    public static function toIso8859($str)
10968
    {
10969 2
        return self::to_iso8859($str);
10970
    }
10971
10972
    /**
10973
     * alias for "UTF8::to_latin1()"
10974
     *
10975
     * @see        UTF8::to_latin1()
10976
     *
10977
     * @param string|string[] $str
10978
     *
10979
     * @return string|string[]
10980
     *
10981
     * @deprecated <p>use "UTF8::to_latin1()"</p>
10982
     */
10983 2
    public static function toLatin1($str)
10984
    {
10985 2
        return self::to_latin1($str);
10986
    }
10987
10988
    /**
10989
     * alias for "UTF8::to_utf8()"
10990
     *
10991
     * @see        UTF8::to_utf8()
10992
     *
10993
     * @param string|string[] $str
10994
     *
10995
     * @return string|string[]
10996
     *
10997
     * @deprecated <p>use "UTF8::to_utf8()"</p>
10998
     */
10999 2
    public static function toUTF8($str)
11000
    {
11001 2
        return self::to_utf8($str);
11002
    }
11003
11004
    /**
11005
     * Convert a string into ASCII.
11006
     *
11007
     * @param string $str     <p>The input string.</p>
11008
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
11009
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
11010
     *                        performance</p>
11011
     *
11012
     * @return string
11013
     */
11014 38
    public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string
11015
    {
11016 38
        static $UTF8_TO_ASCII;
11017
11018 38
        if ($str === '') {
11019 3
            return '';
11020
        }
11021
11022
        // check if we only have ASCII, first (better performance)
11023 35
        if (self::is_ascii($str) === true) {
11024 9
            return $str;
11025
        }
11026
11027 28
        $str = self::clean(
11028 28
            $str,
11029 28
            true,
11030 28
            true,
11031 28
            true,
11032 28
            false,
11033 28
            true,
11034 28
            true
11035
        );
11036
11037
        // check again, if we only have ASCII, now ...
11038 28
        if (self::is_ascii($str) === true) {
11039 10
            return $str;
11040
        }
11041
11042
        if (
11043 19
            $strict === true
11044
            &&
11045 19
            self::$SUPPORT['intl'] === true
11046
        ) {
11047
            // INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C
11048
            /** @noinspection PhpComposerExtensionStubsInspection */
11049
            /** @noinspection UnnecessaryCastingInspection */
11050 1
            $str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str);
11051
11052
            // check again, if we only have ASCII, now ...
11053 1
            if (self::is_ascii($str) === true) {
11054 1
                return $str;
11055
            }
11056
        }
11057
11058 19
        if (self::$ORD === null) {
11059
            self::$ORD = self::getData('ord');
11060
        }
11061
11062 19
        \preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
11063 19
        $chars = $ar[0];
11064 19
        $ord = null;
11065 19
        foreach ($chars as &$c) {
11066 19
            $ordC0 = self::$ORD[$c[0]];
11067
11068 19
            if ($ordC0 >= 0 && $ordC0 <= 127) {
11069 15
                continue;
11070
            }
11071
11072 19
            $ordC1 = self::$ORD[$c[1]];
11073
11074
            // ASCII - next please
11075 19
            if ($ordC0 >= 192 && $ordC0 <= 223) {
11076 17
                $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
11077
            }
11078
11079 19
            if ($ordC0 >= 224) {
11080 8
                $ordC2 = self::$ORD[$c[2]];
11081
11082 8
                if ($ordC0 <= 239) {
11083 7
                    $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
11084
                }
11085
11086 8
                if ($ordC0 >= 240) {
11087 2
                    $ordC3 = self::$ORD[$c[3]];
11088
11089 2
                    if ($ordC0 <= 247) {
11090 2
                        $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
11091
                    }
11092
11093 2
                    if ($ordC0 >= 248) {
11094
                        $ordC4 = self::$ORD[$c[4]];
11095
11096
                        if ($ordC0 <= 251) {
11097
                            $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
11098
                        }
11099
11100
                        if ($ordC0 >= 252) {
11101
                            $ordC5 = self::$ORD[$c[5]];
11102
11103
                            if ($ordC0 <= 253) {
11104
                                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
11105
                            }
11106
                        }
11107
                    }
11108
                }
11109
            }
11110
11111 19
            if ($ordC0 === 254 || $ordC0 === 255) {
11112
                $c = $unknown;
11113
11114
                continue;
11115
            }
11116
11117 19
            if ($ord === null) {
11118
                $c = $unknown;
11119
11120
                continue;
11121
            }
11122
11123 19
            $bank = $ord >> 8;
11124 19
            if (!isset($UTF8_TO_ASCII[$bank])) {
11125 9
                $UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank));
11126 9
                if ($UTF8_TO_ASCII[$bank] === false) {
11127 2
                    $UTF8_TO_ASCII[$bank] = [];
11128
                }
11129
            }
11130
11131 19
            $newchar = $ord & 255;
11132
11133
            /** @noinspection NullCoalescingOperatorCanBeUsedInspection */
11134 19
            if (isset($UTF8_TO_ASCII[$bank][$newchar])) {
11135
11136
                // keep for debugging
11137
                /*
11138
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11139
                echo "char: " . $c . "\n";
11140
                echo "ord: " . $ord . "\n";
11141
                echo "newchar: " . $newchar . "\n";
11142
                echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
11143
                echo "bank:" . $bank . "\n\n";
11144
                 */
11145
11146 18
                $c = $UTF8_TO_ASCII[$bank][$newchar];
11147
            } else {
11148
11149
                // keep for debugging missing chars
11150
                /*
11151
                echo "file: " . sprintf('x%02x', $bank) . "\n";
11152
                echo "char: " . $c . "\n";
11153
                echo "ord: " . $ord . "\n";
11154
                echo "newchar: " . $newchar . "\n";
11155
                echo "bank:" . $bank . "\n\n";
11156
                 */
11157
11158 19
                $c = $unknown;
11159
            }
11160
        }
11161
11162 19
        return \implode('', $chars);
11163
    }
11164
11165
    /**
11166
     * @param mixed $str
11167
     *
11168
     * @return bool
11169
     */
11170 19
    public static function to_boolean($str): bool
11171
    {
11172
        // init
11173 19
        $str = (string) $str;
11174
11175 19
        if ($str === '') {
11176 2
            return false;
11177
        }
11178
11179
        // Info: http://php.net/manual/en/filter.filters.validate.php
11180
        $map = [
11181 17
            'true'  => true,
11182
            '1'     => true,
11183
            'on'    => true,
11184
            'yes'   => true,
11185
            'false' => false,
11186
            '0'     => false,
11187
            'off'   => false,
11188
            'no'    => false,
11189
        ];
11190
11191 17
        if (isset($map[$str])) {
11192 11
            return $map[$str];
11193
        }
11194
11195 6
        $key = \strtolower($str);
11196 6
        if (isset($map[$key])) {
11197 2
            return $map[$key];
11198
        }
11199
11200 4
        if (\is_numeric($str)) {
11201 2
            return ((float) $str + 0) > 0;
11202
        }
11203
11204 2
        return (bool) \trim($str);
11205
    }
11206
11207
    /**
11208
     * Convert given string to safe filename (and keep string case).
11209
     *
11210
     * @param string $string
11211
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
11212
     *                                  simply replaced with hyphen.
11213
     * @param string $fallback_char
11214
     *
11215
     * @return string
11216
     */
11217 1
    public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string
11218
    {
11219 1
        if ($use_transliterate === true) {
11220 1
            $string = self::str_transliterate($string, $fallback_char);
11221
        }
11222
11223 1
        $fallback_char_escaped = \preg_quote($fallback_char, '/');
11224
11225 1
        $string = (string) \preg_replace(
11226
            [
11227 1
                '/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars
11228 1
                '/[\s]+/',                                            // 2) convert spaces to $fallback_char
11229 1
                '/[' . $fallback_char_escaped . ']+/',                // 3) remove double $fallback_char's
11230
            ],
11231
            [
11232 1
                '',
11233 1
                $fallback_char,
11234 1
                $fallback_char,
11235
            ],
11236 1
            $string
11237
        );
11238
11239
        // trim "$fallback_char" from beginning and end of the string
11240 1
        return \trim($string, $fallback_char);
11241
    }
11242
11243
    /**
11244
     * Convert a string into "ISO-8859"-encoding (Latin-1).
11245
     *
11246
     * @param string|string[] $str
11247
     *
11248
     * @return string|string[]
11249
     */
11250 8
    public static function to_iso8859($str)
11251
    {
11252 8
        if (\is_array($str) === true) {
11253 2
            foreach ($str as $k => &$v) {
11254 2
                $v = self::to_iso8859($v);
11255
            }
11256
11257 2
            return $str;
11258
        }
11259
11260 8
        $str = (string) $str;
11261 8
        if ($str === '') {
11262 2
            return '';
11263
        }
11264
11265 8
        return self::utf8_decode($str);
11266
    }
11267
11268
    /**
11269
     * alias for "UTF8::to_iso8859()"
11270
     *
11271
     * @see UTF8::to_iso8859()
11272
     *
11273
     * @param string|string[] $str
11274
     *
11275
     * @return string|string[]
11276
     */
11277 2
    public static function to_latin1($str)
11278
    {
11279 2
        return self::to_iso8859($str);
11280
    }
11281
11282
    /**
11283
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
11284
     *
11285
     * <ul>
11286
     * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
11287
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
11288
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
11289
     * case.</li>
11290
     * </ul>
11291
     *
11292
     * @param string|string[] $str                    <p>Any string or array.</p>
11293
     * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
11294
     *
11295
     * @return string|string[] the UTF-8 encoded string
11296
     */
11297 37
    public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false)
11298
    {
11299 37
        if (\is_array($str) === true) {
11300 4
            foreach ($str as $k => &$v) {
11301 4
                $v = self::to_utf8($v, $decodeHtmlEntityToUtf8);
11302
            }
11303
11304 4
            return $str;
11305
        }
11306
11307 37
        $str = (string) $str;
11308 37
        if ($str === '') {
11309 6
            return $str;
11310
        }
11311
11312 37
        $max = \strlen($str);
11313 37
        $buf = '';
11314
11315 37
        for ($i = 0; $i < $max; ++$i) {
11316 37
            $c1 = $str[$i];
11317
11318 37
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
11319
11320 33
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
11321
11322 30
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11323
11324 30
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
11325 16
                        $buf .= $c1 . $c2;
11326 16
                        ++$i;
11327
                    } else { // not valid UTF8 - convert it
11328 30
                        $buf .= self::to_utf8_convert_helper($c1);
11329
                    }
11330 33
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
11331
11332 32
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11333 32
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11334
11335 32
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
11336 14
                        $buf .= $c1 . $c2 . $c3;
11337 14
                        $i += 2;
11338
                    } else { // not valid UTF8 - convert it
11339 32
                        $buf .= self::to_utf8_convert_helper($c1);
11340
                    }
11341 25
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
11342
11343 25
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
11344 25
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
11345 25
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
11346
11347 25
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
11348 8
                        $buf .= $c1 . $c2 . $c3 . $c4;
11349 8
                        $i += 3;
11350
                    } else { // not valid UTF8 - convert it
11351 25
                        $buf .= self::to_utf8_convert_helper($c1);
11352
                    }
11353
                } else { // doesn't look like UTF8, but should be converted
11354
11355 33
                    $buf .= self::to_utf8_convert_helper($c1);
11356
                }
11357 34
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
11358
11359 3
                $buf .= self::to_utf8_convert_helper($c1);
11360
            } else { // it doesn't need conversion
11361
11362 34
                $buf .= $c1;
11363
            }
11364
        }
11365
11366
        // decode unicode escape sequences + unicode surrogate pairs
11367 37
        $buf = \preg_replace_callback(
11368 37
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
11369
            /**
11370
             * @param array $matches
11371
             *
11372
             * @return string
11373
             */
11374
            static function (array $matches): string {
11375 9
                if (isset($matches[3])) {
11376 9
                    $cp = (int) \hexdec($matches[3]);
11377
                } else {
11378
                    // http://unicode.org/faq/utf_bom.html#utf16-4
11379
                    $cp = ((int) \hexdec($matches[1]) << 10)
11380
                          + (int) \hexdec($matches[2])
11381
                          + 0x10000
11382
                          - (0xD800 << 10)
11383
                          - 0xDC00;
11384
                }
11385
11386
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
11387
                //
11388
                // php_utf32_utf8(unsigned char *buf, unsigned k)
11389
11390 9
                if ($cp < 0x80) {
11391 7
                    return (string) self::chr($cp);
11392
                }
11393
11394 6
                if ($cp < 0xA0) {
11395
                    /** @noinspection UnnecessaryCastingInspection */
11396
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
11397
                }
11398
11399 6
                return self::decimal_to_chr($cp);
11400 37
            },
11401 37
            $buf
11402
        );
11403
11404 37
        if ($buf === null) {
11405
            return '';
11406
        }
11407
11408
        // decode UTF-8 codepoints
11409 37
        if ($decodeHtmlEntityToUtf8 === true) {
11410 2
            $buf = self::html_entity_decode($buf);
11411
        }
11412
11413 37
        return $buf;
11414
    }
11415
11416
    /**
11417
     * Strip whitespace or other characters from beginning or end of a UTF-8 string.
11418
     *
11419
     * INFO: This is slower then "trim()"
11420
     *
11421
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
11422
     * but the check for ACSII (7-Bit) cost more time, then we can safe here.
11423
     *
11424
     * @param string      $str   <p>The string to be trimmed</p>
11425
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
11426
     *
11427
     * @return string the trimmed string
11428
     */
11429 55
    public static function trim(string $str = '', string $chars = null): string
11430
    {
11431 55
        if ($str === '') {
11432 9
            return '';
11433
        }
11434
11435 48
        if ($chars) {
11436 27
            $chars = \preg_quote($chars, '/');
11437 27
            $pattern = "^[${chars}]+|[${chars}]+\$";
11438
        } else {
11439 21
            $pattern = "^[\s]+|[\s]+\$";
11440
        }
11441
11442 48
        if (self::$SUPPORT['mbstring'] === true) {
11443
            /** @noinspection PhpComposerExtensionStubsInspection */
11444 48
            return (string) \mb_ereg_replace($pattern, '', $str);
11445
        }
11446
11447 8
        return self::regex_replace($str, $pattern, '', '', '/');
11448
    }
11449
11450
    /**
11451
     * Makes string's first char uppercase.
11452
     *
11453
     * @param string      $str                   <p>The input string.</p>
11454
     * @param string      $encoding              [optional] <p>Set the charset for e.g. "mb_" function</p>
11455
     * @param bool        $cleanUtf8             [optional] <p>Remove non UTF-8 chars from the string.</p>
11456
     * @param string|null $lang                  [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11457
     * @param bool        $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p>
11458
     *
11459
     * @return string the resulting string
11460
     */
11461 69
    public static function ucfirst(
11462
        string $str,
11463
        string $encoding = 'UTF-8',
11464
        bool $cleanUtf8 = false,
11465
        string $lang = null,
11466
        bool $tryToKeepStringLength = false
11467
    ): string {
11468 69
        if ($str === '') {
11469 3
            return '';
11470
        }
11471
11472 68
        if ($cleanUtf8 === true) {
11473
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11474
            // if invalid characters are found in $haystack before $needle
11475 1
            $str = self::clean($str);
11476
        }
11477
11478 68
        $useMbFunction = $lang === null && $tryToKeepStringLength === false;
11479
11480 68
        if ($encoding === 'UTF-8') {
11481 22
            $strPartTwo = (string) \mb_substr($str, 1);
11482
11483 22
            if ($useMbFunction === true) {
11484 22
                $strPartOne = \mb_strtoupper(
11485 22
                    (string) \mb_substr($str, 0, 1)
11486
                );
11487
            } else {
11488
                $strPartOne = self::strtoupper(
11489
                    (string) \mb_substr($str, 0, 1),
11490
                    $encoding,
11491
                    false,
11492
                    $lang,
11493 22
                    $tryToKeepStringLength
11494
                );
11495
            }
11496
        } else {
11497 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11498
11499 47
            $strPartTwo = (string) self::substr($str, 1, null, $encoding);
11500
11501 47
            if ($useMbFunction === true) {
11502 47
                $strPartOne = \mb_strtoupper(
11503 47
                    (string) \mb_substr($str, 0, 1, $encoding),
11504 47
                    $encoding
11505
                );
11506
            } else {
11507
                $strPartOne = self::strtoupper(
11508
                    (string) self::substr($str, 0, 1, $encoding),
11509
                    $encoding,
11510
                    false,
11511
                    $lang,
11512
                    $tryToKeepStringLength
11513
                );
11514
            }
11515
        }
11516
11517 68
        return $strPartOne . $strPartTwo;
11518
    }
11519
11520
    /**
11521
     * alias for "UTF8::ucfirst()"
11522
     *
11523
     * @see UTF8::ucfirst()
11524
     *
11525
     * @param string $str
11526
     * @param string $encoding
11527
     * @param bool   $cleanUtf8
11528
     *
11529
     * @return string
11530
     */
11531 1
    public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string
11532
    {
11533 1
        return self::ucfirst($str, $encoding, $cleanUtf8);
11534
    }
11535
11536
    /**
11537
     * Uppercase for all words in the string.
11538
     *
11539
     * @param string   $str        <p>The input string.</p>
11540
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
11541
     * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new
11542
     *                             word.</p>
11543
     * @param string   $encoding   [optional] <p>Set the charset.</p>
11544
     * @param bool     $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
11545
     *
11546
     * @return string
11547
     */
11548 8
    public static function ucwords(
11549
        string $str,
11550
        array $exceptions = [],
11551
        string $charlist = '',
11552
        string $encoding = 'UTF-8',
11553
        bool $cleanUtf8 = false
11554
    ): string {
11555 8
        if (!$str) {
11556 2
            return '';
11557
        }
11558
11559
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
11560
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
11561
11562 7
        if ($cleanUtf8 === true) {
11563
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11564
            // if invalid characters are found in $haystack before $needle
11565 1
            $str = self::clean($str);
11566
        }
11567
11568 7
        $usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions));
11569
11570
        if (
11571 7
            $usePhpDefaultFunctions === true
11572
            &&
11573 7
            self::is_ascii($str) === true
11574
        ) {
11575
            return \ucwords($str);
11576
        }
11577
11578 7
        $words = self::str_to_words($str, $charlist);
11579 7
        $useExceptions = \count($exceptions) > 0;
11580
11581 7
        foreach ($words as &$word) {
11582 7
            if (!$word) {
11583 7
                continue;
11584
            }
11585
11586
            if (
11587 7
                $useExceptions === false
11588
                ||
11589 7
                !\in_array($word, $exceptions, true)
11590
            ) {
11591 7
                $word = self::ucfirst($word, $encoding);
11592
            }
11593
        }
11594
11595 7
        return \implode('', $words);
11596
    }
11597
11598
    /**
11599
     * Multi decode html entity & fix urlencoded-win1252-chars.
11600
     *
11601
     * e.g:
11602
     * 'test+test'                     => 'test test'
11603
     * 'D&#252;sseldorf'               => 'Düsseldorf'
11604
     * 'D%FCsseldorf'                  => 'Düsseldorf'
11605
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
11606
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
11607
     * 'Düsseldorf'                   => 'Düsseldorf'
11608
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
11609
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
11610
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
11611
     *
11612
     * @param string $str          <p>The input string.</p>
11613
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
11614
     *
11615
     * @return string
11616
     */
11617 2
    public static function urldecode(string $str, bool $multi_decode = true): string
11618
    {
11619 2
        if ($str === '') {
11620 2
            return '';
11621
        }
11622
11623
        if (
11624 2
            \strpos($str, '&') === false
11625
            &&
11626 2
            \strpos($str, '%') === false
11627
            &&
11628 2
            \strpos($str, '+') === false
11629
            &&
11630 2
            \strpos($str, '\u') === false
11631
        ) {
11632 2
            return self::fix_simple_utf8($str);
11633
        }
11634
11635 2
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
11636 2
        if (\preg_match($pattern, $str)) {
11637 2
            $str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str));
11638
        }
11639
11640 2
        $flags = \ENT_QUOTES | \ENT_HTML5;
11641
11642 2
        if ($multi_decode === true) {
11643
            do {
11644 2
                $str_compare = $str;
11645
11646
                /**
11647
                 * @psalm-suppress PossiblyInvalidArgument
11648
                 */
11649 2
                $str = self::fix_simple_utf8(
11650 2
                    \urldecode(
11651 2
                        self::html_entity_decode(
11652 2
                            self::to_utf8($str),
11653 2
                            $flags
11654
                        )
11655
                    )
11656
                );
11657 2
            } while ($str_compare !== $str);
11658
        }
11659
11660 2
        return $str;
11661
    }
11662
11663
    /**
11664
     * Return a array with "urlencoded"-win1252 -> UTF-8
11665
     *
11666
     * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
11667
     *
11668
     * @return string[]
11669
     */
11670 2
    public static function urldecode_fix_win1252_chars(): array
11671
    {
11672
        return [
11673 2
            '%20' => ' ',
11674
            '%21' => '!',
11675
            '%22' => '"',
11676
            '%23' => '#',
11677
            '%24' => '$',
11678
            '%25' => '%',
11679
            '%26' => '&',
11680
            '%27' => "'",
11681
            '%28' => '(',
11682
            '%29' => ')',
11683
            '%2A' => '*',
11684
            '%2B' => '+',
11685
            '%2C' => ',',
11686
            '%2D' => '-',
11687
            '%2E' => '.',
11688
            '%2F' => '/',
11689
            '%30' => '0',
11690
            '%31' => '1',
11691
            '%32' => '2',
11692
            '%33' => '3',
11693
            '%34' => '4',
11694
            '%35' => '5',
11695
            '%36' => '6',
11696
            '%37' => '7',
11697
            '%38' => '8',
11698
            '%39' => '9',
11699
            '%3A' => ':',
11700
            '%3B' => ';',
11701
            '%3C' => '<',
11702
            '%3D' => '=',
11703
            '%3E' => '>',
11704
            '%3F' => '?',
11705
            '%40' => '@',
11706
            '%41' => 'A',
11707
            '%42' => 'B',
11708
            '%43' => 'C',
11709
            '%44' => 'D',
11710
            '%45' => 'E',
11711
            '%46' => 'F',
11712
            '%47' => 'G',
11713
            '%48' => 'H',
11714
            '%49' => 'I',
11715
            '%4A' => 'J',
11716
            '%4B' => 'K',
11717
            '%4C' => 'L',
11718
            '%4D' => 'M',
11719
            '%4E' => 'N',
11720
            '%4F' => 'O',
11721
            '%50' => 'P',
11722
            '%51' => 'Q',
11723
            '%52' => 'R',
11724
            '%53' => 'S',
11725
            '%54' => 'T',
11726
            '%55' => 'U',
11727
            '%56' => 'V',
11728
            '%57' => 'W',
11729
            '%58' => 'X',
11730
            '%59' => 'Y',
11731
            '%5A' => 'Z',
11732
            '%5B' => '[',
11733
            '%5C' => '\\',
11734
            '%5D' => ']',
11735
            '%5E' => '^',
11736
            '%5F' => '_',
11737
            '%60' => '`',
11738
            '%61' => 'a',
11739
            '%62' => 'b',
11740
            '%63' => 'c',
11741
            '%64' => 'd',
11742
            '%65' => 'e',
11743
            '%66' => 'f',
11744
            '%67' => 'g',
11745
            '%68' => 'h',
11746
            '%69' => 'i',
11747
            '%6A' => 'j',
11748
            '%6B' => 'k',
11749
            '%6C' => 'l',
11750
            '%6D' => 'm',
11751
            '%6E' => 'n',
11752
            '%6F' => 'o',
11753
            '%70' => 'p',
11754
            '%71' => 'q',
11755
            '%72' => 'r',
11756
            '%73' => 's',
11757
            '%74' => 't',
11758
            '%75' => 'u',
11759
            '%76' => 'v',
11760
            '%77' => 'w',
11761
            '%78' => 'x',
11762
            '%79' => 'y',
11763
            '%7A' => 'z',
11764
            '%7B' => '{',
11765
            '%7C' => '|',
11766
            '%7D' => '}',
11767
            '%7E' => '~',
11768
            '%7F' => '',
11769
            '%80' => '`',
11770
            '%81' => '',
11771
            '%82' => '‚',
11772
            '%83' => 'ƒ',
11773
            '%84' => '„',
11774
            '%85' => '…',
11775
            '%86' => '†',
11776
            '%87' => '‡',
11777
            '%88' => 'ˆ',
11778
            '%89' => '‰',
11779
            '%8A' => 'Š',
11780
            '%8B' => '‹',
11781
            '%8C' => 'Œ',
11782
            '%8D' => '',
11783
            '%8E' => 'Ž',
11784
            '%8F' => '',
11785
            '%90' => '',
11786
            '%91' => '‘',
11787
            '%92' => '’',
11788
            '%93' => '“',
11789
            '%94' => '”',
11790
            '%95' => '•',
11791
            '%96' => '–',
11792
            '%97' => '—',
11793
            '%98' => '˜',
11794
            '%99' => '™',
11795
            '%9A' => 'š',
11796
            '%9B' => '›',
11797
            '%9C' => 'œ',
11798
            '%9D' => '',
11799
            '%9E' => 'ž',
11800
            '%9F' => 'Ÿ',
11801
            '%A0' => '',
11802
            '%A1' => '¡',
11803
            '%A2' => '¢',
11804
            '%A3' => '£',
11805
            '%A4' => '¤',
11806
            '%A5' => '¥',
11807
            '%A6' => '¦',
11808
            '%A7' => '§',
11809
            '%A8' => '¨',
11810
            '%A9' => '©',
11811
            '%AA' => 'ª',
11812
            '%AB' => '«',
11813
            '%AC' => '¬',
11814
            '%AD' => '',
11815
            '%AE' => '®',
11816
            '%AF' => '¯',
11817
            '%B0' => '°',
11818
            '%B1' => '±',
11819
            '%B2' => '²',
11820
            '%B3' => '³',
11821
            '%B4' => '´',
11822
            '%B5' => 'µ',
11823
            '%B6' => '¶',
11824
            '%B7' => '·',
11825
            '%B8' => '¸',
11826
            '%B9' => '¹',
11827
            '%BA' => 'º',
11828
            '%BB' => '»',
11829
            '%BC' => '¼',
11830
            '%BD' => '½',
11831
            '%BE' => '¾',
11832
            '%BF' => '¿',
11833
            '%C0' => 'À',
11834
            '%C1' => 'Á',
11835
            '%C2' => 'Â',
11836
            '%C3' => 'Ã',
11837
            '%C4' => 'Ä',
11838
            '%C5' => 'Å',
11839
            '%C6' => 'Æ',
11840
            '%C7' => 'Ç',
11841
            '%C8' => 'È',
11842
            '%C9' => 'É',
11843
            '%CA' => 'Ê',
11844
            '%CB' => 'Ë',
11845
            '%CC' => 'Ì',
11846
            '%CD' => 'Í',
11847
            '%CE' => 'Î',
11848
            '%CF' => 'Ï',
11849
            '%D0' => 'Ð',
11850
            '%D1' => 'Ñ',
11851
            '%D2' => 'Ò',
11852
            '%D3' => 'Ó',
11853
            '%D4' => 'Ô',
11854
            '%D5' => 'Õ',
11855
            '%D6' => 'Ö',
11856
            '%D7' => '×',
11857
            '%D8' => 'Ø',
11858
            '%D9' => 'Ù',
11859
            '%DA' => 'Ú',
11860
            '%DB' => 'Û',
11861
            '%DC' => 'Ü',
11862
            '%DD' => 'Ý',
11863
            '%DE' => 'Þ',
11864
            '%DF' => 'ß',
11865
            '%E0' => 'à',
11866
            '%E1' => 'á',
11867
            '%E2' => 'â',
11868
            '%E3' => 'ã',
11869
            '%E4' => 'ä',
11870
            '%E5' => 'å',
11871
            '%E6' => 'æ',
11872
            '%E7' => 'ç',
11873
            '%E8' => 'è',
11874
            '%E9' => 'é',
11875
            '%EA' => 'ê',
11876
            '%EB' => 'ë',
11877
            '%EC' => 'ì',
11878
            '%ED' => 'í',
11879
            '%EE' => 'î',
11880
            '%EF' => 'ï',
11881
            '%F0' => 'ð',
11882
            '%F1' => 'ñ',
11883
            '%F2' => 'ò',
11884
            '%F3' => 'ó',
11885
            '%F4' => 'ô',
11886
            '%F5' => 'õ',
11887
            '%F6' => 'ö',
11888
            '%F7' => '÷',
11889
            '%F8' => 'ø',
11890
            '%F9' => 'ù',
11891
            '%FA' => 'ú',
11892
            '%FB' => 'û',
11893
            '%FC' => 'ü',
11894
            '%FD' => 'ý',
11895
            '%FE' => 'þ',
11896
            '%FF' => 'ÿ',
11897
        ];
11898
    }
11899
11900
    /**
11901
     * Decodes an UTF-8 string to ISO-8859-1.
11902
     *
11903
     * @param string $str           <p>The input string.</p>
11904
     * @param bool   $keepUtf8Chars
11905
     *
11906
     * @return string
11907
     */
11908 14
    public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string
11909
    {
11910 14
        if ($str === '') {
11911 5
            return '';
11912
        }
11913
11914 14
        static $UTF8_TO_WIN1252_KEYS_CACHE = null;
11915 14
        static $UTF8_TO_WIN1252_VALUES_CACHE = null;
11916
11917 14
        if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
11918 1
            if (self::$WIN1252_TO_UTF8 === null) {
11919
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
11920
            }
11921
11922 1
            $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
11923 1
            $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
11924
        }
11925
11926 14
        $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
11927
11928
        // save for later comparision
11929 14
        $str_backup = $str;
11930 14
        $len = \strlen($str);
11931
11932 14
        if (self::$ORD === null) {
11933
            self::$ORD = self::getData('ord');
11934
        }
11935
11936 14
        if (self::$CHR === null) {
11937
            self::$CHR = self::getData('chr');
11938
        }
11939
11940 14
        $noCharFound = '?';
11941
        /** @noinspection ForeachInvariantsInspection */
11942 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
11943 14
            switch ($str[$i] & "\xF0") {
11944 14
                case "\xC0":
11945 13
                case "\xD0":
11946 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
11947 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
11948
11949 13
                    break;
11950
11951
                /** @noinspection PhpMissingBreakStatementInspection */
11952 13
                case "\xF0":
11953
                    ++$i;
11954
11955
                // no break
11956
11957 13
                case "\xE0":
11958 11
                    $str[$j] = $noCharFound;
11959 11
                    $i += 2;
11960
11961 11
                    break;
11962
11963
                default:
11964 12
                    $str[$j] = $str[$i];
11965
            }
11966
        }
11967
11968 14
        $return = \substr($str, 0, $j);
11969 14
        if ($return === false) {
11970
            $return = '';
11971
        }
11972
11973
        if (
11974 14
            $keepUtf8Chars === true
11975
            &&
11976 14
            self::strlen($return) >= (int) self::strlen($str_backup)
11977
        ) {
11978 2
            return $str_backup;
11979
        }
11980
11981 14
        return $return;
11982
    }
11983
11984
    /**
11985
     * Encodes an ISO-8859-1 string to UTF-8.
11986
     *
11987
     * @param string $str <p>The input string.</p>
11988
     *
11989
     * @return string
11990
     */
11991 14
    public static function utf8_encode(string $str): string
11992
    {
11993 14
        if ($str === '') {
11994 13
            return '';
11995
        }
11996
11997 14
        $str = \utf8_encode($str);
11998
11999
        // the polyfill maybe return false
12000
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
12001
        /** @psalm-suppress TypeDoesNotContainType */
12002 14
        if ($str === false) {
12003
            return '';
12004
        }
12005
12006 14
        if (\strpos($str, "\xC2") === false) {
12007 6
            return $str;
12008
        }
12009
12010 12
        static $WIN1252_TO_UTF8_KEYS_CACHE = null;
12011 12
        static $WIN1252_TO_UTF8_VALUES_CACHE = null;
12012
12013 12
        if ($WIN1252_TO_UTF8_KEYS_CACHE === null) {
12014 1
            if (self::$WIN1252_TO_UTF8 === null) {
12015
                self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12016
            }
12017
12018 1
            $WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8);
12019 1
            $WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8);
12020
        }
12021
12022 12
        return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str);
12023
    }
12024
12025
    /**
12026
     * fix -> utf8-win1252 chars
12027
     *
12028
     * @param string $str <p>The input string.</p>
12029
     *
12030
     * @return string
12031
     *
12032
     * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
12033
     */
12034 2
    public static function utf8_fix_win1252_chars(string $str): string
12035
    {
12036 2
        return self::fix_simple_utf8($str);
12037
    }
12038
12039
    /**
12040
     * Returns an array with all utf8 whitespace characters.
12041
     *
12042
     * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
12043
     *
12044
     * @author: Derek E. [email protected]
12045
     *
12046
     * @return string[]
12047
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
12048
     *                  as defined in above URL
12049
     */
12050 2
    public static function whitespace_table(): array
12051
    {
12052 2
        return self::$WHITESPACE_TABLE;
12053
    }
12054
12055
    /**
12056
     * Limit the number of words in a string.
12057
     *
12058
     * @param string $str      <p>The input string.</p>
12059
     * @param int    $limit    <p>The limit of words as integer.</p>
12060
     * @param string $strAddOn <p>Replacement for the striped string.</p>
12061
     *
12062
     * @return string
12063
     */
12064 2
    public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string
12065
    {
12066 2
        if ($str === '' || $limit < 1) {
12067 2
            return '';
12068
        }
12069
12070 2
        \preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
12071
12072
        if (
12073 2
            !isset($matches[0])
12074
            ||
12075 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
12076
        ) {
12077 2
            return $str;
12078
        }
12079
12080 2
        return \rtrim($matches[0]) . $strAddOn;
12081
    }
12082
12083
    /**
12084
     * Wraps a string to a given number of characters
12085
     *
12086
     * @see  http://php.net/manual/en/function.wordwrap.php
12087
     *
12088
     * @param string $str   <p>The input string.</p>
12089
     * @param int    $width [optional] <p>The column width.</p>
12090
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
12091
     * @param bool   $cut   [optional] <p>
12092
     *                      If the cut is set to true, the string is
12093
     *                      always wrapped at or before the specified width. So if you have
12094
     *                      a word that is larger than the given width, it is broken apart.
12095
     *                      </p>
12096
     *
12097
     * @return string
12098
     *                <p>The given string wrapped at the specified column.</p>
12099
     */
12100 10
    public static function wordwrap(
12101
        string $str,
12102
        int $width = 75,
12103
        string $break = "\n",
12104
        bool $cut = false
12105
    ): string {
12106 10
        if ($str === '' || $break === '') {
12107 3
            return '';
12108
        }
12109
12110 8
        $w = '';
12111 8
        $strSplit = \explode($break, $str);
12112 8
        if ($strSplit === false) {
12113
            return '';
12114
        }
12115 8
        $chars = [];
12116
12117 8
        foreach ($strSplit as $i => $iValue) {
12118 8
            if ($i) {
12119 1
                $chars[] = $break;
12120 1
                $w .= '#';
12121
            }
12122
12123 8
            $c = $iValue;
12124 8
            unset($strSplit[$i]);
12125
12126 8
            foreach (self::str_split($c) as $c) {
12127 8
                $chars[] = $c;
12128 8
                $w .= $c === ' ' ? ' ' : '?';
12129
            }
12130
        }
12131
12132 8
        $strReturn = '';
12133 8
        $j = 0;
12134 8
        $b = $i = -1;
12135 8
        $w = \wordwrap($w, $width, '#', $cut);
12136
12137 8
        while (false !== $b = \mb_strpos($w, '#', $b + 1)) {
12138 6
            for (++$i; $i < $b; ++$i) {
12139 6
                $strReturn .= $chars[$j];
12140 6
                unset($chars[$j++]);
12141
            }
12142
12143 6
            if ($break === $chars[$j] || $chars[$j] === ' ') {
12144 3
                unset($chars[$j++]);
12145
            }
12146
12147 6
            $strReturn .= $break;
12148
        }
12149
12150 8
        return $strReturn . \implode('', $chars);
12151
    }
12152
12153
    /**
12154
     * Line-Wrap the string after $limit, but also after the next word.
12155
     *
12156
     * @param string $str
12157
     * @param int    $limit
12158
     *
12159
     * @return string
12160
     */
12161 1
    public static function wordwrap_per_line(string $str, int $limit): string
12162
    {
12163 1
        $strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str);
12164
12165 1
        $string = '';
12166 1
        foreach ($strings as &$value) {
12167 1
            if ($value === false) {
12168
                continue;
12169
            }
12170
12171 1
            $string .= \wordwrap($value, $limit);
12172 1
            $string .= "\n";
12173
        }
12174
12175 1
        return $string;
12176
    }
12177
12178
    /**
12179
     * Returns an array of Unicode White Space characters.
12180
     *
12181
     * @return string[] an array with numeric code point as key and White Space Character as value
12182
     */
12183 2
    public static function ws(): array
12184
    {
12185 2
        return self::$WHITESPACE;
12186
    }
12187
12188
    /**
12189
     * @return void
12190
     */
12191 9
    private static function initEmojiData()
12192
    {
12193 9
        if (self::$EMOJI_KEYS_CACHE === null) {
12194 1
            if (self::$EMOJI === null) {
12195 1
                self::$EMOJI = self::getData('emoji');
12196
            }
12197
12198 1
            \uksort(
12199
                self::$EMOJI,
12200
                static function ($a, $b) {
12201 1
                    return \strlen($b) <=> \strlen($a);
12202 1
                }
12203
            );
12204
12205 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
12206 1
            self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI);
12207
12208 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
12209 1
                $tmpKey = \crc32($key);
12210 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_';
12211
            }
12212
        }
12213 9
    }
12214
12215
    /**
12216
     * @param string $str
12217
     * @param bool   $useLower     <p>Use uppercase by default, otherwise use lowecase.</p>
12218
     * @param bool   $fullCaseFold <p>Convert not only common cases.</p>
12219
     *
12220
     * @return string
12221
     */
12222 33
    private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string
12223
    {
12224 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
12225 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
12226
12227 33
        if ($useLower === true) {
12228 2
            $str = \str_replace(
12229 2
                $upper,
12230 2
                $lower,
12231 2
                $str
12232
            );
12233
        } else {
12234 31
            $str = \str_replace(
12235 31
                $lower,
12236 31
                $upper,
12237 31
                $str
12238
            );
12239
        }
12240
12241 33
        if ($fullCaseFold) {
12242 31
            static $FULL_CASE_FOLD = null;
12243 31
            if ($FULL_CASE_FOLD === null) {
12244 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
12245
            }
12246
12247 31
            if ($useLower === true) {
12248 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
12249
            } else {
12250 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
12251
            }
12252
        }
12253
12254 33
        return $str;
12255
    }
12256
12257
    /**
12258
     * get data from "/data/*.php"
12259
     *
12260
     * @param string $file
12261
     *
12262
     * @return mixed
12263
     */
12264 5
    private static function getData(string $file)
12265
    {
12266
        /** @noinspection PhpIncludeInspection */
12267
        /** @noinspection UsingInclusionReturnValueInspection */
12268
        /** @psalm-suppress UnresolvableInclude */
12269 5
        return include __DIR__ . '/data/' . $file . '.php';
12270
    }
12271
12272
    /**
12273
     * get data from "/data/*.php"
12274
     *
12275
     * @param string $file
12276
     *
12277
     * @return false|mixed will return false on error
12278
     */
12279 9
    private static function getDataIfExists(string $file)
12280
    {
12281 9
        $file = __DIR__ . '/data/' . $file . '.php';
12282 9
        if (\file_exists($file)) {
12283
            /** @noinspection PhpIncludeInspection */
12284
            /** @noinspection UsingInclusionReturnValueInspection */
12285 8
            return include $file;
12286
        }
12287
12288 2
        return false;
12289
    }
12290
12291
    /**
12292
     * Checks whether mbstring "overloaded" is active on the server.
12293
     *
12294
     * @return bool
12295
     */
12296
    private static function mbstring_overloaded(): bool
12297
    {
12298
        /**
12299
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
12300
         */
12301
12302
        /** @noinspection PhpComposerExtensionStubsInspection */
12303
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
12304
        return \defined('MB_OVERLOAD_STRING')
12305
               &&
12306
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
12307
    }
12308
12309
    /**
12310
     * @param array $strings
12311
     * @param bool  $removeEmptyValues
12312
     * @param int   $removeShortValues
12313
     *
12314
     * @return array
12315
     */
12316 2
    private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array
12317
    {
12318
        // init
12319 2
        $return = [];
12320
12321 2
        foreach ($strings as &$str) {
12322
            if (
12323 2
                $removeShortValues !== null
12324
                &&
12325 2
                \mb_strlen($str) <= $removeShortValues
12326
            ) {
12327 2
                continue;
12328
            }
12329
12330
            if (
12331 2
                $removeEmptyValues === true
12332
                &&
12333 2
                \trim($str) === ''
12334
            ) {
12335 2
                continue;
12336
            }
12337
12338 2
            $return[] = $str;
12339
        }
12340
12341 2
        return $return;
12342
    }
12343
12344
    /**
12345
     * rxClass
12346
     *
12347
     * @param string $s
12348
     * @param string $class
12349
     *
12350
     * @return string
12351
     */
12352 33
    private static function rxClass(string $s, string $class = ''): string
12353
    {
12354 33
        static $RX_CLASSS_CACHE = [];
12355
12356 33
        $cacheKey = $s . $class;
12357
12358 33
        if (isset($RX_CLASSS_CACHE[$cacheKey])) {
12359 21
            return $RX_CLASSS_CACHE[$cacheKey];
12360
        }
12361
12362 16
        $class = [$class];
12363
12364
        /** @noinspection SuspiciousLoopInspection */
12365
        /** @noinspection AlterInForeachInspection */
12366 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
12367 15
            if ($s === '-') {
12368
                $class[0] = '-' . $class[0];
12369 15
            } elseif (!isset($s[2])) {
12370 15
                $class[0] .= \preg_quote($s, '/');
12371 1
            } elseif (self::strlen($s) === 1) {
12372 1
                $class[0] .= $s;
12373
            } else {
12374 15
                $class[] = $s;
12375
            }
12376
        }
12377
12378 16
        if ($class[0]) {
12379 16
            $class[0] = '[' . $class[0] . ']';
12380
        }
12381
12382 16
        if (\count($class) === 1) {
12383 16
            $return = $class[0];
12384
        } else {
12385
            $return = '(?:' . \implode('|', $class) . ')';
12386
        }
12387
12388 16
        $RX_CLASSS_CACHE[$cacheKey] = $return;
12389
12390 16
        return $return;
12391
    }
12392
12393
    /**
12394
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
12395
     *
12396
     * @param string $names
12397
     * @param string $delimiter
12398
     * @param string $encoding
12399
     *
12400
     * @return string
12401
     */
12402 1
    private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string
12403
    {
12404
        // init
12405 1
        $namesArray = \explode($delimiter, $names);
12406
12407 1
        if ($namesArray === false) {
12408
            return '';
12409
        }
12410
12411
        $specialCases = [
12412 1
            'names' => [
12413
                'ab',
12414
                'af',
12415
                'al',
12416
                'and',
12417
                'ap',
12418
                'bint',
12419
                'binte',
12420
                'da',
12421
                'de',
12422
                'del',
12423
                'den',
12424
                'der',
12425
                'di',
12426
                'dit',
12427
                'ibn',
12428
                'la',
12429
                'mac',
12430
                'nic',
12431
                'of',
12432
                'ter',
12433
                'the',
12434
                'und',
12435
                'van',
12436
                'von',
12437
                'y',
12438
                'zu',
12439
            ],
12440
            'prefixes' => [
12441
                'al-',
12442
                "d'",
12443
                'ff',
12444
                "l'",
12445
                'mac',
12446
                'mc',
12447
                'nic',
12448
            ],
12449
        ];
12450
12451 1
        foreach ($namesArray as &$name) {
12452 1
            if (\in_array($name, $specialCases['names'], true)) {
12453 1
                continue;
12454
            }
12455
12456 1
            $continue = false;
12457
12458 1
            if ($delimiter === '-') {
12459 1
                foreach ((array) $specialCases['names'] as &$beginning) {
12460 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12461 1
                        $continue = true;
12462
                    }
12463
                }
12464 1
                unset($beginning);
12465
            }
12466
12467 1
            foreach ((array) $specialCases['prefixes'] as &$beginning) {
12468 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
12469 1
                    $continue = true;
12470
                }
12471
            }
12472 1
            unset($beginning);
12473
12474 1
            if ($continue === true) {
12475 1
                continue;
12476
            }
12477
12478 1
            $name = self::ucfirst($name);
12479
        }
12480
12481 1
        return \implode($delimiter, $namesArray);
12482
    }
12483
12484
    /**
12485
     * Generic case sensitive transformation for collation matching.
12486
     *
12487
     * @param string $str <p>The input string</p>
12488
     *
12489
     * @return string|null
12490
     */
12491 6
    private static function strtonatfold(string $str)
12492
    {
12493 6
        return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
12494
    }
12495
12496
    /**
12497
     * @param int|string $input
12498
     *
12499
     * @return string
12500
     */
12501 29
    private static function to_utf8_convert_helper($input): string
12502
    {
12503
        // init
12504 29
        $buf = '';
12505
12506 29
        if (self::$ORD === null) {
12507 1
            self::$ORD = self::getData('ord');
12508
        }
12509
12510 29
        if (self::$CHR === null) {
12511 1
            self::$CHR = self::getData('chr');
12512
        }
12513
12514 29
        if (self::$WIN1252_TO_UTF8 === null) {
12515 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
12516
        }
12517
12518 29
        $ordC1 = self::$ORD[$input];
12519 29
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
12520 29
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
12521
        } else {
12522 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
12523 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
12524 1
            $buf .= $cc1 . $cc2;
12525
        }
12526
12527 29
        return $buf;
12528
    }
12529
}
12530